library(meffil)
#options(mc.cores=1)
library(tidyverse)
library(readxl)
library(rebus)
library(ewastools)
library(kableExtra)
library(gtools)
library(rlist)
library(broom)
library(lmtest)
library(sandwich)
library(scattermore)
library(janitor)
library(PCAtools)
library(corrplot)
library(GGally)
library(prediction)
library(ggVennDiagram)
library(ggpubr)
library(performance)
library(sesame)
library(parallel)
library(doParallel)
library(ComplexHeatmap)
library(circlize)
library(betareg)
library(table1)
library(DT)
library(DNAmArray)
library(DiagrammeR)
library(rebus)
library(data.table)
mutate <- dplyr::mutate
select <- dplyr::select
rename <- dplyr::rename
or <- rebus::or
library(here)
Samples marked for dropping will be dropped just after ComBat for EWAS analyses and filtered after uploading to the clock website for epigenetic age analyses.
# write.csv(df5, file = "congo_ewas_pheno_20221120.csv",
# row.names = FALSE)
df5 <- read.csv(file = here("data","congo_ewas_pheno_20221120.csv"))
# Get a vector of samples that correspond to sample mix-ups, accidental
# duplicates collected at different times,
# siblings to remove, etc. These are in the "drop_sample" column. These
# were identified in initial sample quality control.
droppers <- df5 %>%
filter(drop_sample == TRUE) %>%
pull(methylation_id)
# Dropped a total of 140 samples there.496-140 = 356 idat files
cat("We have",nrow(df5),"pairs of idat files and",
length(unique(paste0(df5$dyad,df5$num,df5$tissue))) - 8,
"unique individuals.")
## We have 356 pairs of idat files and 324 unique individuals.
bdf <- df5 %>%
filter(tissue == c("baby_venous_blood")) %>%
rename(mom_age = "age") %>%
mutate(Age = 0)
mdf <- df5 %>%
filter(tissue == c("mother_venous_blood")) %>%
mutate(Sex = c("F")) %>%
mutate(Age = age)
# 177 babies and 179 mothers.
# meffil.list.cell.type.references()
# [1] "andrews and bakulski cord blood" "blood gse35069"
# [3] "blood gse35069 chen" "blood gse35069 complete"
# [5] "blood idoloptimized" "blood idoloptimized epic"
# [7] "combined cord blood" "cord blood gse68456"
# [9] "gervin and lyle cord blood" "guintivano dlpfc"
# [11] "saliva gse48472"
if(file.exists(here("output","qc.objects.baby.Robj"))) {
load(here("output","qc.objects.baby.Robj"))
} else {
qc.objects.baby <- meffil.qc(bdf, cell.type.reference="combined cord blood", verbose=TRUE)
save(qc.objects.baby,file = here("output","qc.objects.baby.Robj"))
}
if(file.exists(here("output","qc.objects.mother.Robj"))) {
load(here("output","qc.objects.mother.Robj"))
} else {
qc.objects.mother <- meffil.qc(mdf, cell.type.reference="blood idoloptimized epic", verbose=TRUE)
save(qc.objects.mother,file=here("output","qc.objects.mother.Robj"))
}
qc.parameters <- meffil.qc.parameters(
beadnum.samples.threshold = 0.1,
detectionp.samples.threshold = 0.1,
detectionp.cpgs.threshold = 0.1,
beadnum.cpgs.threshold = 0.1,
sex.outlier.sd = 5
)
# Get quality control summary reports
if(file.exists(here("output","qcsummary.baby.Robj"))) {
load(here("output","qcsummary.baby.Robj"))
} else {
qc.summary.baby <- meffil.qc.summary(
qc.objects.baby,
parameters = qc.parameters
)
save(qc.summary.baby, file = here("output","qcsummary.baby.Robj"))
meffil.qc.report(qc.summary.baby,
output.file = here("output","qc.report.baby.html"))
}
if(file.exists(here("output","qcsummary.mother.Robj"))){
load(here("output","qcsummary.mother.Robj"))
} else {
qc.summary.mother <- meffil.qc.summary(
qc.objects.mother,
parameters = qc.parameters
)
save(qc.summary.mother, file = here("output","qcsummary.mother.Robj"))
meffil.qc.report(qc.summary.mother,
output.file = here("output","qc.report.mother.html"))
}
# List the quality control outliers
# for babies:
outlier.baby <- qc.summary.baby$bad.samples
table(outlier.baby$issue)
##
## Control probe (extension.G.12719506) Control probe (extension.G.74666473)
## 1 1
## Control probe (extension.R.21752326) Control probe (extension.R.63642461)
## 1 1
## Control probe (hybe.21771417) Control probe (hybe.39782321)
## 2 1
## Control probe (nonpoly.G.23663352) Control probe (nonpoly.G.38796356)
## 1 1
## Control probe (normC) Control probe (normG)
## 1 1
## Control probe (oob.G.99%) Control probe (spec1.G.51804467)
## 1 1
## Control probe (spec1.ratio) Control probe (spec1.ratio1)
## 1 1
## Control probe (spec1.ratio2) Control probe (spec2.ratio)
## 1 1
## Detection p-value Methylated vs Unmethylated
## 1 2
## Sex mismatch X-Y ratio outlier
## 5 1
index.baby <- outlier.baby$issue %in% c("Control probe (dye.bias)",
"Methylated vs Unmethylated",
"X-Y ratio outlier",
"Low bead numbers",
"Detection p-value",
"Sex mismatch",
"Control probe (bisulfite1)",
"Control probe (bisulfite2)")
outlier.baby <- outlier.baby[index.baby,]
# For mothers:
outlier.mother <- qc.summary.mother$bad.samples
table(outlier.mother$issue)
##
## Control probe (hybe.21771417) Control probe (spec1.ratio)
## 1 1
## Control probe (spec1.ratio1) Methylated vs Unmethylated
## 1 3
## Sex mismatch
## 1
index.mother <- outlier.mother$issue %in% c("Control probe (dye.bias)",
"Methylated vs Unmethylated",
"X-Y ratio outlier",
"Low bead numbers",
"Detection p-value",
"Sex mismatch",
"Control probe (bisulfite1)",
"Control probe (bisulfite2)")
outlier.mother <- outlier.mother[index.mother,]
cc.baby <- t(sapply(qc.objects.baby, function(obj) obj$cell.counts$counts))
cc.baby <- data.frame(IID=row.names(cc.baby),cc.baby)
write.csv(cc.baby, file = here("output","congo_baby_cell_composition_20220613.csv"))
cc.mother <- t(sapply(qc.objects.mother, function(obj) obj$cell.counts$counts))
cc.mother <- data.frame(IID=row.names(cc.mother),cc.mother)
write.csv(cc.mother, file = here("output","congo_mother_cell_composition_20220613.csv"))
Use ewastools package for this.
if(file.exists(here("output","snps.RDS"))){
snps <- readRDS(file = here("output","snps.RDS"))
} else {
meth <- read_idats(df5$Basename,quiet = FALSE) # This includes all mother and
# baby samples at birth of the correct tissue type.
detectionP <- ewastools::detectionP
mask <- ewastools::mask
correct_dye_bias <- ewastools::correct_dye_bias
dont_normalize <- ewastools::dont_normalize
# get betas
beta <- meth %>% detectionP %>% mask(0.01) %>% correct_dye_bias %>% dont_normalize
# get snps
snps <- meth$manifest[probe_type=="rs",index]
snps <- beta[snps,]
saveRDS(snps, file = here("output","snps.RDS"))
rm(meth)
rm(beta)
}
genotypes <- call_genotypes(snps,learn=FALSE)
df5$outlier <- snp_outliers(genotypes)
contam <- df5[df5$outlier > -4,c("Sample_Name","outlier")]
contam %>% kbl(caption="Samples identified as contaminated by ewastools") %>% kable_styling("hover",full_width=F)
| Sample_Name | outlier | |
|---|---|---|
| 260 | SV001b.b.1.2 | -3.463359 |
| 293 | SV018M.1.1 | -3.904566 |
Get noob corrected betas for the epigenetic clock.
if(file.exists(here("data", "congo_mothers_and_babies_noob_betas.rds"))) {
noobBetas <- readRDS(file = here("data", "congo_mothers_and_babies_noob_betas.rds"))
} else {
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("readIDATpair","noob","getBetas"))
noobBetas <- do.call(cbind, parLapply(cl,df5$Basename, function(pfx) {
getBetas(noob(readIDATpair(pfx)), mask = F)
}))
stopCluster(cl)
colnames(noobBetas) <- df5$methylation_id
saveRDS(noobBetas, file = here("data","congo_mothers_and_babies_noob_betas.rds"))
}
df6 <- df5 %>%
mutate(Age = ifelse(tissue == "baby_venous_blood",0,age)) %>%
mutate(Female = ifelse(tissue == "mother_venous_blood",1,Female))
clock <- df6 %>%
select(methylation_id,
slide_position,
Age,
Tissue,
Female)
# Reformat the noob betas object obtained in the chunk above this one
# and filter them for the probes needed for the epigenetic clock.
noobBetas <- as.data.frame(noobBetas)
noobBetas$Name <- rownames(noobBetas)
probes <- read.csv(here("supp_data","datMiniAnnotation3.csv"))
# get betas for Horvath's probe list
fil.noob.betas <- merge(probes, noobBetas, by = "Name", all.x = TRUE, all.y = FALSE)
nrow(fil.noob.betas) # 30084 is what we need
fil.noob.betas[1:10,1:10]
# remove extra columns
fil.noob.betas <- fil.noob.betas[-c(2:7)]
colnames(fil.noob.betas)[1] <- "ProbeID"
cn <- colnames(fil.noob.betas[-1])
link <- match(cn,clock$methylation_id)
cc2 <- clock[link,]
identical(colnames(fil.noob.betas[-1]),cc2$methylation_id)
write.csv(cc2,
file = here("data","epigenetic_clock_samplesheet.csv"),
row.names = F)
write.csv(fil.noob.betas,
row.names = F,
file = here("data","epigenetic_clock_betas.csv"))
# Read in output from Horvath's website
mage <- read.csv(here("data","epigenetic_clock_betas.output.csv"))
# Create a data frame of problematic samples by the epigenetic clock
# quality control checks
gs <- mage %>%
filter(corSampleVSgoldstandard < 0.80) %>%
select(SampleID)
clock_sex_checks <- mage %>%
mutate(problem_sex = case_when(
predictedGender == "female" & Female == 1 ~ FALSE,
predictedGender == "male" & Female == 0 ~ FALSE,
predictedGender == "female" & Female == 0 ~ TRUE,
predictedGender == "male" & Female == 1 ~ TRUE,
predictedGender == "Unsure" ~ TRUE)) %>%
filter(problem_sex == TRUE)
# write function to average replicates
avgReps <- function(repsToAverage){
dr <- mage[mage$methylation_id %in% repsToAverage,]
row <- dr %>%
summarise_if(is.numeric, mean, na.rm = TRUE)
x <- smartbind(dr,row,fill="replaceme")
dex <- grep("replaceme",x[nrow(x), ])
fillers <- x[nrow(x) - 1, dex]
fin <- replace(x[nrow(x), ], dex, fillers)
return(fin[nrow(fin),])
}
# Get replicate status and subset for all replicates.
# Use this to get list of which samples to average.
rav <- df6 %>%
select(methylation_id,replicate,dyad,tissue) %>%
filter(replicate == TRUE) %>%
inner_join(mage, by = c("methylation_id" = "methylation_id"))%>%
group_by(dyad,tissue) %>%
select(methylation_id)
toAverage <- by(rav$methylation_id,rav$dyad,print)
# average by replicates and store the new
# rows in a data frame
rep1 <- avgReps(toAverage$C1)
rep2 <- avgReps(toAverage$C12)
rep3 <- avgReps(toAverage$C20)
rep4 <- avgReps(toAverage$C29)
rep5 <- avgReps(toAverage$C49[c(1,5)])
rep6 <- avgReps(toAverage$C49[c(2:4)])
rep7 <- avgReps(toAverage$C5)
rep8 <- avgReps(toAverage$C62)
rep9 <- avgReps(toAverage$C89[c(1,3)])
rep10 <- avgReps(toAverage$C89[c(2,4)])
rep11 <- avgReps(toAverage$C91)
rep12 <- avgReps(toAverage$SV11)
rep13 <- avgReps(toAverage$SV51)
rep14 <- avgReps(toAverage$SV61)
rep15 <- avgReps(toAverage$SV67)
rep16 <- avgReps(toAverage$SV7)
rep17 <- avgReps(toAverage$SV76)
averaged_reps <- bind_rows(rep1,rep2,rep3,rep4,rep5,rep6,
rep7,rep8,rep9,rep10,rep11,
rep12,rep13,rep14,rep15,rep16,rep17)
# remove replicates rows from original epigenetic age
# data frame and then
# Add back in the rows with the average replicate
# values. For character vectors, such as slide
# and position on slide, just selecting a single
# value because these cannot be averaged. That's
# fine as long as we're only using numeric variables.
mage2 <- mage %>%
filter(!(methylation_id %in% rav$methylation_id)) %>%
bind_rows(averaged_reps)
cat("There were",length(rav$methylation_id),"replicate samples total out of",
nrow(mage),"samples in epigenetic age analyses, corresponding to",
nrow(averaged_reps),"unique individuals for whom we had replicates samples.")
cat("These were removed from the data, averaged by replicate, and
then added back into the data, leaving",
nrow(mage2),"samples.")
# Samples failing QC checks should be removed
# at a later point, after considering all
# measures.
# Finally, create a wide data set so that
# each row represents a single dyad. Subset
# mothers and babies into separate data
# frames, and then column bind them adding
# suffixes to each column name according
# to which data frame that column is from.
mage3 <- df6 %>%
select(-Tissue,-Female,-Age,-slide_position) %>%
inner_join(mage2, by = c("methylation_id" = "methylation_id"))
write.csv(mage3, file = here("output","congo_epigenetic_clock_20220706.csv"))
mage3_baby <- mage3 %>%
filter(tissue == "baby_venous_blood")
mage3_mother <- mage3 %>%
filter(tissue == "mother_venous_blood")
setdiff(mage3_baby$dyad,mage3_mother$dyad)
setdiff(mage3_mother$dyad,mage3_baby$dyad)
mage4 <- mage3_baby %>%
full_join(mage3_mother, by = c("dyad" = "dyad"), suffix = c("_baby","_mother"))
write.csv(mage4, file = here("output","congo_epigenetic_clock_wide_20220706.csv"))
# Illumina's EPIC manifest
illumina <- read.csv(here("supp_data",
"infinium-methylationepic-v-1-0-b5-manifest-file.csv"))
illumina2 <- illumina %>%
select(IlmnID,UCSC_RefGene_Group) %>%
rename(probeID = IlmnID,gene_context = UCSC_RefGene_Group)
# Get the probes to mask
zhou1 <- read.table(file = here("supp_data","EPIC.hg38.manifest.pop.tsv"),
sep = '\t', header = TRUE)
# Get the cpg island context and distance to transcription start site columns
zhou2 <- read.table(file = here("supp_data","EPIC.hg38.manifest.gencode.v36.tsv"), sep = '\t', header = TRUE)
zhou3 <- read.table(file = here("supp_data","EPIC.hg38.manifest.tsv"),
sep = '\t', header = TRUE)
zhou <- zhou1 %>%
select(probeID,MASK_general_AFR) %>%
left_join(zhou3, by = c("probeID" = "probeID"))
# Get probes to mask for quality reasons plus
# high snp frequencies near the extension base
# for the African super population from 1000
# Genomes Project
mask <- zhou %>%
filter(MASK_general_AFR == TRUE) %>%
select(probeID)
#
#
if (file.exists(here("data","congo_mothers_and_babies_sesame_betas_20220630.rds"))){
betas <- readRDS(here("data","congo_mothers_and_babies_sesame_betas_20220630.rds"))
} else {
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("readIDATpair","noob","getBetas","dyeBiasNL",
"pOOBAH","getBetas","addMask","openSesame","mask",
"inferInfiniumIChannel", "prefixMaskButCG"))
betas <- do.call(cbind,
parLapply(cl,df6$Basename, function(pfx) {
getBetas(
noob(
pOOBAH(
dyeBiasNL(
inferInfiniumIChannel(
addMask(
prefixMaskButCG(
readIDATpair(pfx)), probes = mask$probeID))))))
}))
#
stopCluster(cl)
#
colnames(betas) <- df6$methylation_id
saveRDS(betas, file = here("data","congo_mothers_and_babies_sesame_betas_20220630.rds"))
}
if (file.exists(here("data","congo_mothers_and_babies_raw_betas_20220630.rds"))){
raw_betas <- readRDS(here("data","congo_mothers_and_babies_raw_betas_20220630.rds"))
} else {
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("readIDATpair","noob","getBetas","getBetas","addMask","mask",
"inferInfiniumIChannel","prefixMaskButCG"))
raw_betas <- do.call(cbind,
parLapply(cl,df6$Basename, function(pfx) {
getBetas(
addMask(
prefixMaskButCG(
readIDATpair(pfx)), probes = mask$probeID))
}))
stopCluster(cl)
colnames(raw_betas) <- df6$methylation_id
saveRDS(raw_betas, file = here("data","congo_mothers_and_babies_raw_betas_20220630.rds"))
}
########################################################################
### ###
### REMOVING ZERO INTENSITY PROBES ###
### ###
########################################################################
### From the DNAmArray R Package: https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_molepi_DNAmArray&d=DwIGAg&c=sJ6xIWYx-zLMB3EPkvcnVg&r=sWbu0rLU_L4ssDfYmHby1mWqmTG4ExdsuRnL2LYjxa8&m=zYM2gm7K2prO9Jt7dxhNG1vFdBYlyEwJ_EJaDAcASlI&s=socmXYQLInnwM7c0fOpTC5UoKzpeHv_0xg5tgJ8KYoY&e=
# This is all to get the zeroIntensityProbes. If you have them,
# load them. Otherwise start the chunk and get a coffee.
if (file.exists(here("output","zeroIntensityProbes.rds"))){
zeroIntensityProbes <- readRDS(here("output","zeroIntensityProbes.rds"))
} else {
# Make a combined samplesheet to read into the targets function
batch1 <- read.csv(here("data","samplesheet_april2016.csv")) %>%
mutate(Sample_Plate = as.character(Sample_Plate))
batch2 <- read.csv(here("data","samplesheet_aug2016.csv"))
batch3 <- read.csv(here("data","samplesheet_jan2019.csv"))
batch4 <- read.csv(here("data","samplesheet_jan2021.csv"))
batch5 <- read.csv(here("data","samplesheet_june2016.csv"))
batch6 <- read.csv(here("data","samplesheet_march2021.csv"))
batches <- bind_rows(batch1,batch2,batch3,batch4,batch5,batch6) %>%
filter(Sample_Name %in% df6$core_id)
write.csv(batches,
here("data","combined_samplesheet_zero_intensity.csv"),
row.names = F)
# Generate targets object
targets <- read.metharray.sheet(here("data"),
pattern = c("combined_samplesheet_zero_intensity.csv"))
# get an "RGChannelSetExtended" object. They parallelize this
# in the vignette but I purposefully do not. Ironically, it
# breaks on the cluster, which is made for parallelization...
RGset2 <- read.metharray.exp(targets = targets,
verbose = FALSE,
extended=TRUE,
force = TRUE)
# Read idats into a DNAmArray "RGChannelSetExtended" object
probeFiltering <- function(RGset, cutbead=3, zeroint=TRUE, verbose=TRUE){
if(class(RGset) != "RGChannelSetExtended")
stop("RGset should be of class 'RGChannelSetExtended' in order to perform filtering on number of beads!")
##Filter on number of beads
if(verbose)
cat("Filtering on number of beads... \n")
beadmat <- getNBeads(RGset)
idBeadmat <- beadmat < cutbead
##beadmat[idBeadmat] <- NA
if(verbose)
cat("On average", round(100*sum(idBeadmat)/prod(dim(idBeadmat)), 2),"% of the probes (",nrow(idBeadmat),") have number of beads below", cutbead, "\n")
##Filter on Red and Green intensity <1
if(zeroint) {
if(verbose)
cat("Filtering on zero intensities... \n")
Grn <- getGreen(RGset)
Red <- getRed(RGset)
##determine if Grn and/or Red intensities of type II probes are <1
idT2 <- Grn[getProbeInfo(RGset, type = "II")$AddressA,] < 1 | Red[getProbeInfo(RGset, type = "II")$AddressA,] < 1
##determine if either Grn or Red intensities of Type I probes are <1
idT1Grn <- Grn[c(getProbeInfo(RGset, type = "I-Green")$AddressA,
getProbeInfo(RGset, type = "I-Green")$AddressB),] < 1
idT1Red <- Red[c(getProbeInfo(RGset, type = "I-Red")$AddressA,
getProbeInfo(RGset, type = "I-Red")$AddressB),] < 1
if(verbose) {
cat("On average", round(100*sum(idT2)/prod(dim(idT2)), 3),"% of the Type II probes (",nrow(idT2),") have Red and/or Green intensity below 1 \n")
cat("On average", round(100*sum(idT1Grn)/prod(dim(idT1Grn)), 3),"% of the Type I probes (",nrow(idT1Grn),"), measured in Green channel, have intensity below 1 \n")
cat("On average", round(100*sum(idT1Red)/prod(dim(idT1Red)), 3),"% of the Type I probes (",nrow(idT1Red),"), measured in Red channel, have intensity below 1 \n")
}
}
##combine all filtered results and set NA in Red and/or Green channels
Red[idBeadmat] <- Grn[idBeadmat] <- NA
if(zeroint) {
if(verbose){
cat("Set filtered probes in Red and/or Green channels to NA... \n")
}
for(i in 1:ncol(RGset)) {
if(verbose & i%%100 == 0)
cat("... done ",i," out of ",ncol(RGset)," ... \n")
idRed <- c(names(which(idT2[,i])), names(which(idT1Red[,i])))
midRed <- match(idRed, rownames(Red))
Red[midRed, i] <- NA
idGrn <- c(names(which(idT2[,i])), names(which(idT1Grn[,i])))
midGrn <- match(idGrn, rownames(Grn))
Grn[midGrn, i] <- NA
}
}
RGChannelSet(Green = Grn, Red = Red,
colData = colData(RGset),
annotation = annotation(RGset))
}
tempfilteringRGset <- probeFiltering(RGset2,cutbead=3,zeroint=TRUE)
rm(RGset2)
tempbetas2 <- getBeta(preprocessRaw(tempfilteringRGset))
rm(tempfilteringRGset)
# get tally of samples for which a given probe is NA
gna <- rowSums(is.na(tempbetas2))
gna2 <- cbind.data.frame(rownames(tempbetas2),gna)
# get probe names for probes that either had zero intensity or
# less than three beads in > 10% of all samples.
zeroIntensityProbes <- gna2[gna2$gna > ncol(tempbetas2)*0.1,]$`rownames(tempbetas2)`
length(zeroIntensityProbes) # 987 zero intensity probes.
saveRDS(zeroIntensityProbes, file = here("output","zeroIntensityProbes.rds"))
# clear up some space
rm(tempbetas2)
rm(gna)
rm(gna2)
}
Description of probe attrition up to this point and set zeroIntensity probes to NA in preprocessed betas objects.
# Probe attrition starts here, with the sum
# of probes that are marked NA by initial
# masking, which set "rs" and "ch" probes to NA
# and the probes indicated for masking for quality
# reasons, and the probes indicated for masking
# for allele frequencies of SNPs in the African
# super population:
table(apply(raw_betas, 1, function (x) sum(is.na(x))))
# 0 356 Number of Participants
# 739396 127157 Number of Probes set to NA
# Are any of these non-CpG probes identified as
# zero intensity?
checker <- apply(raw_betas, 1, function (x) sum(is.na(x)))
checker <- names(checker[checker==ncol(raw_betas)])
table(checker %in% zeroIntensityProbes)
# Yes, 106 probes are overlapping. That means
# we have 987 - 106 probes that are uniquely masked
# for zero intensity reasons, minus a single probe
# in zero intensity object that is not in the rownames
# of the raw betas object: 987 - 106 - 1 = 880.
# How many rows are all NA for probes after preprocessing but
# before adding NA for zero intensity probes?
prena <- apply(betas, 1, function (x) sum(is.na(x)))
# run table(prena) to see that 128,238 probes are NA for everyone.
# 127,157 were initially masked because they were not CpG probes,
# but some other type like "rs" or "ch" probes, or they did not
# pass the two masking filters (cg probes only and no SNP problems
# with a probe for the African super population).
# 128,238 - 127,157 = 1081 probes failed in all samples upon
# initial preprocessing.
betas[rownames(betas) %in% zeroIntensityProbes==TRUE,] <- NA
# how many probes are NA for everyone now?
postna <- apply(betas, 1, function (x) sum(is.na(x)))
table(postna)
probes_na <- names(postna[postna==ncol(betas)])
table(zeroIntensityProbes %in% probes_na)
# 129111 probes are marked NA, which indicates
# that of the 880 zeroIntensity probes that could
# uniquely added more NA probes, 7 of them overlapped
# with the probes that were set to NA during the initial
# preprocessing for every single sample.
# Step 1 probe attrition final counts:
# initial masking of bad probes, snp probes, and non cg probes:
# 127157
# Additional masking of probes in all samples by preprocessing:
# 1081
# Additional probes set to NA because of zeroIntensity, that were
# not already marked NA by initial masking or preprocessing:
# 873.
# Total probes masked for every single person at this stage = 129111.
# x is a matrix containing the data
# method : correlation method. "pearson"" or "spearman"" is supported
# removeTriangle : remove upper or lower triangle
# results : if "html" or "latex"
# the results will be displayed in html or latex format
corstars <-function(x, method=c("pearson", "spearman"), removeTriangle=c("upper", "lower"),
result=c("none", "html", "latex")){
#Compute correlation matrix
require(Hmisc)
x <- as.matrix(x)
correlation_matrix<-rcorr(x, type=method[1])
R <- correlation_matrix$r # Matrix of correlation coeficients
p <- correlation_matrix$P # Matrix of p-value
## Define notions for significance levels; spacing is important.
mystars <- ifelse(p < .0001, "****", ifelse(p < .001, "*** ", ifelse(p < .01, "** ", ifelse(p < .05, "* ", " "))))
## trunctuate the correlation matrix to two decimal
R <- format(round(cbind(rep(-1.11, ncol(x)), R), 4))[,-1]
## build a new matrix that includes the correlations with their apropriate stars
Rnew <- matrix(paste(R, mystars, sep=""), ncol=ncol(x))
diag(Rnew) <- paste(diag(R), " ", sep="")
rownames(Rnew) <- colnames(x)
colnames(Rnew) <- paste(colnames(x), "", sep="")
## remove upper triangle of correlation matrix
if(removeTriangle[1]=="upper"){
Rnew <- as.matrix(Rnew)
Rnew[upper.tri(Rnew, diag = TRUE)] <- ""
Rnew <- as.data.frame(Rnew)
}
## remove lower triangle of correlation matrix
else if(removeTriangle[1]=="lower"){
Rnew <- as.matrix(Rnew)
Rnew[lower.tri(Rnew, diag = TRUE)] <- ""
Rnew <- as.data.frame(Rnew)
}
## remove last column and return the correlation matrix
Rnew <- cbind(Rnew[1:length(Rnew)-1])
if (result[1]=="none") return(Rnew)
else{
if(result[1]=="html") print(xtable(Rnew), type="html")
else print(xtable(Rnew), type="latex")
}
}
raw_betas <- as.data.frame(raw_betas)
# Raw replicate correlations
raw_rep1 <- raw_betas %>%
select(toAverage$C1) %>%
corstars(method = "spearman")
raw_rep2 <- raw_betas %>%
select(toAverage$C12) %>%
corstars(method = "spearman")
raw_rep3 <- raw_betas %>%
select(toAverage$C20) %>%
corstars(method = "spearman")
raw_rep4 <- raw_betas %>%
select(toAverage$C29) %>%
corstars(method = "spearman")
raw_rep5 <- raw_betas %>%
select(toAverage$C49[c(1,5)]) %>%
corstars(method = "spearman")
raw_rep6 <- raw_betas %>%
select(toAverage$C49[c(2:4)]) %>%
corstars(method = "spearman")
raw_rep7 <- raw_betas %>%
select(toAverage$C5) %>%
corstars(method = "spearman")
raw_rep8 <- raw_betas %>%
select(toAverage$C62) %>%
corstars(method = "spearman")
raw_rep9 <- raw_betas %>%
select(toAverage$C89[c(1,3)]) %>%
corstars(method = "spearman")
raw_rep10 <- raw_betas %>%
select(toAverage$C89[c(2,4)]) %>%
corstars(method = "spearman")
raw_rep11 <- raw_betas %>%
select(toAverage$C91) %>%
corstars(method = "spearman")
raw_rep12 <- raw_betas %>%
select(toAverage$SV11) %>%
corstars(method = "spearman")
raw_rep13 <- raw_betas %>%
select(toAverage$SV51) %>%
corstars(method = "spearman")
raw_rep14 <- raw_betas %>%
select(toAverage$SV61) %>%
corstars(method = "spearman")
raw_rep15 <- raw_betas %>%
select(toAverage$SV67) %>%
corstars(method = "spearman")
raw_rep16 <- raw_betas %>%
select(toAverage$SV7) %>%
corstars(method = "spearman")
raw_rep17 <- raw_betas %>%
select(toAverage$SV76) %>%
corstars(method = "spearman")
raw_reps <- c(raw_rep1,raw_rep2,raw_rep3,raw_rep4,raw_rep5,raw_rep6,raw_rep7,
raw_rep8,raw_rep9,raw_rep10,raw_rep11,raw_rep12,raw_rep13,raw_rep14,
raw_rep15,raw_rep16,raw_rep17)
raw_reps
## $C1M.1.1
## [1] "" " 0.9752****"
##
## $C012bb.1.1.
## [1] "" " 0.9665****"
##
## $C20bb.1.1_250
## [1] "" " 0.9657****"
##
## $C29M.2.1
## [1] "" " 0.9725****"
##
## $C49bb.2.1
## [1] "" " 0.9552****"
##
## $C49M.1.2
## [1] "" " 0.9406****" " 0.9366****"
##
## $C49M.1.2_clean
## [1] "" "" " 0.9740****"
##
## $C6.bb.2.1a
## [1] "" " 0.9205****"
##
## $C62M.1.1
## [1] "" " 0.9668****"
##
## $C89bb.2.1
## [1] "" " 0.9548****"
##
## $C89M2.1
## [1] "" " 0.9697****"
##
## $C91bb.1.2
## [1] "" " 0.9146****"
##
## $`SV011M.1.1 - eq100`
## [1] "" " 0.9761****" " 0.9796****"
##
## $`SV011M.1.1 - eq101`
## [1] "" "" " 0.9712****"
##
## $`SV051M.1 - eq100`
## [1] "" " 0.9849****" " 0.9818****"
##
## $`SV051M.1 - eq101`
## [1] "" "" " 0.9826****"
##
## $`SV061bb.2.1 - eq100`
## [1] "" " 0.9816****" " 0.9722****"
##
## $`SV061bb.2.1 - eq101`
## [1] "" "" " 0.9720****"
##
## $`SV067bb.1.1 - quad4`
## [1] "" " 0.9743****" " 0.9737****" " 0.9754****"
##
## $`SV067bb.1.1 - quad2 - eq100`
## [1] "" "" " 0.9748****" " 0.9762****"
##
## $SV067b.b.1.1
## [1] "" "" "" " 0.9750****"
##
## $`SV007M.1 - eq100`
## [1] "" " 0.9812****" " 0.9785****"
##
## $`SV007M.1.1 - eq101`
## [1] "" "" " 0.9802****"
##
## $`SV076M.1.1 - dup1a`
## [1] "" " 0.9562****"
raw_correlations <- c(0.9752, # C1M
0.9665, # C12bb
0.9657, # C20bb
0.9725, # C29M
0.9552, # C49bb
mean(0.9406,0.9366,0.9740), # C49M
0.9205, # C6bb
0.9668, # C62M
0.9548, # C89bb
0.9697, # C89M
0.9146, # C91bb
mean(0.9761,0.9796,0.9712), # SV11M
mean(0.9849,0.9818,0.9826), # SV51M
mean(0.9816,0.9722,0.9720), # SV61bb
mean(0.9743,0.9737,0.9754,0.9748,0.9762,0.9750), # SV67bb
mean(0.9812,0.9785,0.9802), # SV7M
0.9562) # SV76M
# SeSame Correlations
betas <- as.data.frame(betas)
sesame_rep1 <- betas %>%
select(toAverage$C1) %>%
corstars(method = "spearman")
sesame_rep2 <- betas %>%
select(toAverage$C12) %>%
corstars(method = "spearman")
sesame_rep3 <- betas %>%
select(toAverage$C20) %>%
corstars(method = "spearman")
sesame_rep4 <- betas %>%
select(toAverage$C29) %>%
corstars(method = "spearman")
sesame_rep5 <- betas %>%
select(toAverage$C49[c(1,5)]) %>%
corstars(method = "spearman")
sesame_rep6 <- betas %>%
select(toAverage$C49[c(2:4)]) %>%
corstars(method = "spearman")
sesame_rep7 <- betas %>%
select(toAverage$C5) %>%
corstars(method = "spearman")
sesame_rep8 <- betas %>%
select(toAverage$C62) %>%
corstars(method = "spearman")
sesame_rep9 <- betas %>%
select(toAverage$C89[c(1,3)]) %>%
corstars(method = "spearman")
sesame_rep10 <- betas %>%
select(toAverage$C89[c(2,4)]) %>%
corstars(method = "spearman")
sesame_rep11 <- betas %>%
select(toAverage$C91) %>%
corstars(method = "spearman")
sesame_rep12 <- betas %>%
select(toAverage$SV11) %>%
corstars(method = "spearman")
sesame_rep13 <- betas %>%
select(toAverage$SV51) %>%
corstars(method = "spearman")
sesame_rep14 <- betas %>%
select(toAverage$SV61) %>%
corstars(method = "spearman")
sesame_rep15 <- betas %>%
select(toAverage$SV67) %>%
corstars(method = "spearman")
sesame_rep16 <- betas %>%
select(toAverage$SV7) %>%
corstars(method = "spearman")
sesame_rep17 <- betas %>%
select(toAverage$SV76) %>%
corstars(method = "spearman")
sesame_reps <- c(sesame_rep1,sesame_rep2,sesame_rep3,sesame_rep4,sesame_rep5,sesame_rep6,sesame_rep7,
sesame_rep8,sesame_rep9,sesame_rep10,sesame_rep11,sesame_rep12,sesame_rep13,sesame_rep14,
sesame_rep15,sesame_rep16,sesame_rep17)
sesame_reps
## $C1M.1.1
## [1] "" " 0.9852****"
##
## $C012bb.1.1.
## [1] "" " 0.9688****"
##
## $C20bb.1.1_250
## [1] "" " 0.9817****"
##
## $C29M.2.1
## [1] "" " 0.9777****"
##
## $C49bb.2.1
## [1] "" " 0.9756****"
##
## $C49M.1.2
## [1] "" " 0.9645****" " 0.9463****"
##
## $C49M.1.2_clean
## [1] "" "" " 0.9766****"
##
## $C6.bb.2.1a
## [1] "" " 0.9197****"
##
## $C62M.1.1
## [1] "" " 0.9804****"
##
## $C89bb.2.1
## [1] "" " 0.9681****"
##
## $C89M2.1
## [1] "" " 0.9821****"
##
## $C91bb.1.2
## [1] "" " 0.9255****"
##
## $`SV011M.1.1 - eq100`
## [1] "" " 0.9871****" " 0.9879****"
##
## $`SV011M.1.1 - eq101`
## [1] "" "" " 0.9856****"
##
## $`SV051M.1 - eq100`
## [1] "" " 0.9893****" " 0.9865****"
##
## $`SV051M.1 - eq101`
## [1] "" "" " 0.9875****"
##
## $`SV061bb.2.1 - eq100`
## [1] "" " 0.9889****" " 0.9829****"
##
## $`SV061bb.2.1 - eq101`
## [1] "" "" " 0.9812****"
##
## $`SV067bb.1.1 - quad4`
## [1] "" " 0.9851****" " 0.9845****" " 0.9809****"
##
## $`SV067bb.1.1 - quad2 - eq100`
## [1] "" "" " 0.9854****" " 0.9830****"
##
## $SV067b.b.1.1
## [1] "" "" "" " 0.9829****"
##
## $`SV007M.1 - eq100`
## [1] "" " 0.9887****" " 0.9741****"
##
## $`SV007M.1.1 - eq101`
## [1] "" "" " 0.9799****"
##
## $`SV076M.1.1 - dup1a`
## [1] "" " 0.9817****"
sesame_correlations <- c(0.9852, # C1M
0.9688, # C12bb
0.9817, # C20bb
0.9777, # C29M
0.9756, # C49bb
mean(0.9645,0.9463,0.9766), # C49M
0.9197, # C6bb
0.9804, # C62M
0.9681, # C89bb
0.9821, # C89M
0.9255, # C91bb
mean(0.9871,0.9879,0.9856), # SV11M
mean(0.9893,0.9865,0.9875), # SV51M
mean(0.9889,0.9829,0.9812), # SV61bb
mean(0.9851,0.9845,0.9809,0.9854,0.9830,0.9829), # SV67bb
mean(0.9887,0.9741,0.9799), # SV7M
0.9817) # SV76M
corr_names <- c("C1M","C12bb","C20bb","C29M","C49bb","C49M","C6bb","C62M",
"C89bb","C89M","C91bb","SV11M","SV51M","SV61bb","SV67bb","SV7M","SV76M")
correlations <- cbind.data.frame(corr_names,raw_correlations,sesame_correlations)
stats <- round(apply(correlations[2:3],2,median), digits = 4)
brow <- c("Median",stats)
correlations <- rbind(correlations,brow)
colnames(correlations) <- c("Replicate","Raw","Preprocessed")
kbl(correlations,
caption = "Technical Replicate Correlations for Raw and Preprocessed Betas") %>%
kable_styling("hover", full_width = F)
| Replicate | Raw | Preprocessed |
|---|---|---|
| C1M | 0.9752 | 0.9852 |
| C12bb | 0.9665 | 0.9688 |
| C20bb | 0.9657 | 0.9817 |
| C29M | 0.9725 | 0.9777 |
| C49bb | 0.9552 | 0.9756 |
| C49M | 0.9406 | 0.9645 |
| C6bb | 0.9205 | 0.9197 |
| C62M | 0.9668 | 0.9804 |
| C89bb | 0.9548 | 0.9681 |
| C89M | 0.9697 | 0.9821 |
| C91bb | 0.9146 | 0.9255 |
| SV11M | 0.9761 | 0.9871 |
| SV51M | 0.9849 | 0.9893 |
| SV61bb | 0.9816 | 0.9889 |
| SV67bb | 0.9743 | 0.9851 |
| SV7M | 0.9812 | 0.9887 |
| SV76M | 0.9562 | 0.9817 |
| Median | 0.9668 | 0.9817 |
screeplot <- PCAtools::screeplot
pca <- PCAtools::pca
rownames(df6) <- df6$methylation_id
df_reps <- df6 %>%
filter(replicate==TRUE)
df_reps$dyad <- as.factor(df_reps$dyad)
df_reps <- as.data.frame(df_reps)
rownames(df_reps) <- df_reps$methylation_id
raw_betas_reps <- raw_betas %>%
select(rav$methylation_id)
sesame_betas_reps <- betas %>%
select(rav$methylation_id)
identical(rownames(df_reps),colnames(raw_betas_reps))
## [1] TRUE
p <- pca(na.omit(raw_betas_reps), metadata = df_reps)
q <- pca(na.omit(sesame_betas_reps), metadata = df_reps)
screeplot(p, components = getComponents(p, 1:10))
screeplot(q, components = getComponents(p, 1:10))
biplot(p, lab = p$metadata$dyad ,colby = 'dyad',
hline = 0, vline = 0,
xlim = c(-100,100),
ylim = c(-100,100),
legendPosition = 'right',
title = 'Raw Betas PCA bi-plot',
subtitle = 'PC1 versus PC2')
biplot(q,lab = p$metadata$dyad ,colby = 'dyad',
hline = 0, vline = 0,
xlim = c(-50,50),
ylim = c(-50,50),
legendPosition = 'right',
title = 'SeSame Betas PCA bi-plot',
subtitle = 'PC1 versus PC2 - note change in axis scale')
sesame_cor <- eigencorplot(q, metavars = c('Age',
'Female',
'cohort','palco','setot',
'gtsum','achronic','awar_nr'),
rotLabX = 45,
cexCorval = 0.7,
colCorval = 'white',
col = c('darkblue', 'blue2', 'black', 'red2', 'darkred'),
posColKey = 'top',
main = "PCA Correlations",
signifSymbols = c('****', '***', '**', '*', ''),
signifCutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1))
sesame_cor
# These are the samples failing the QC checks
outlier.baby
## sample.name issue
## 124 C006bb.1.2. Sex mismatch
## 34 C5bb.1.2 Sex mismatch
## 100 C6.bb.2.1a Sex mismatch
## C91bb.1.2 C91bb.1.2 Methylated vs Unmethylated
## SV001b.b.1.2 SV001b.b.1.2 Methylated vs Unmethylated
## SV001b.b.1.21 SV001b.b.1.2 Detection p-value
## 157 SV001b.b.1.2 X-Y ratio outlier
## 1571 SV001b.b.1.2 Sex mismatch
## 145 SV014b.b.1.1 Sex mismatch
outlier.mother
## sample.name issue
## C084M C084M Methylated vs Unmethylated
## C086M C086M Methylated vs Unmethylated
## SV018M.1.1 SV018M.1.1 Methylated vs Unmethylated
## 179 SV043M.2.1a Sex mismatch
# These samples fail the contamination check.
# These overlap with the results above from the
# meffil QC checks.
contam
## Sample_Name outlier
## 260 SV001b.b.1.2 -3.463359
## 293 SV018M.1.1 -3.904566
fail_qc <- c(unique(outlier.baby$sample.name),
outlier.mother$sample.name)
fail_qc
## [1] "C006bb.1.2." "C5bb.1.2" "C6.bb.2.1a" "C91bb.1.2" "SV001b.b.1.2"
## [6] "SV014b.b.1.1" "C084M" "C086M" "SV018M.1.1" "SV043M.2.1a"
# That's 10 samples to drop failing qc
Remove the samples that failed qc and the probes failing in greater than 10% of all samples after removing failed samples. Then we need to drop the batches that are contributing basically no samples and preventing ComBat from running because all probes have missing data for a subset of 8 samples, for example. After ComBat, then we can remove replicates according to which batch they belong to. This works because samples failing quality control have already been removed, and therefore it doesn’t matter which replicate we keep. Letting batch membership dictate the choice is fine.
Further sample attrition steps are described in this chunk.
These are betas preprocessed using the SeSame pipeline and then corrected for batch effects using ComBat.
if (file.exists(here("output","combat_betas.rds"))){
combat_betas <- readRDS(here("output","combat_betas.rds"))
df_combat <- df6 %>%
filter(methylation_id %in% colnames(combat_betas))
linker <- match(df_combat$methylation_id,colnames(combat_betas))
df_combat <- df_combat[linker,]
identical(df_combat$methylation_id,colnames(combat_betas))
} else {
# First remove those samples failing quality control:
betas_clean <- betas[!(colnames(betas) %in% fail_qc)]
# That takes us from 356 samples to 346 samples.
# Next, remove those samples that belong to small
# batches contributing very few samples. This prevents
# ComBat from working because we have all missing data
# for a number of these batches on many probes.
get_batch_drop <- df6 %>%
filter(plate == "A" | plate == "B" | plate == "F") %>%
select(methylation_id)
betas_clean_2 <- betas_clean[!(colnames(betas_clean) %in% get_batch_drop$methylation_id)]
# Removing the small batches results in a loss of 23 samples, taking us from
# 346 to 323
# Remove all probes that failed in more than 10% of samples:
c_betas <- betas_clean_2[rowSums(is.na(betas_clean_2)) < ncol(betas_clean_2)*0.10,]
# That takes us from 866553 to 706987.
dim(c_betas)
df_combat <- df6 %>%
filter(methylation_id %in% colnames(c_betas))
linker <- match(df_combat$methylation_id,colnames(c_betas))
df_combat <- df_combat[linker,]
identical(df_combat$methylation_id,colnames(c_betas))
# Run ComBat:
mvals <- as.matrix(BetaValueToMValue(c_betas))
# make model matrix
modcombat <- model.matrix(~1, data = df_combat)
# Run ComBat with a parametric Bayesian framework
cvals <- ComBat(
dat = mvals,
batch = df_combat$plate,
mod = modcombat,
par.prior = TRUE, # runs a parametric framework
prior.plots = FALSE, # outputs prior plots
mean.only = FALSE,
ref.batch = NULL)
combat_betas <- as.data.frame(MValueToBetaValue(cvals))
rm(cvals)
rm(mvals)
rm(betas_clean)
rm(betas_clean_2)
saveRDS(combat_betas, file = here("output","combat_betas.rds"))
}
## [1] TRUE
Check the performance of the replicates we have. It won’t be as many as with the raw and sesame checks because we have dropped failed samples and three small batches of samples.
# correlation
# combat replicate correlations
combat_rep4 <- combat_betas %>%
select(toAverage$C29) %>%
corstars(method = "spearman")
combat_rep8 <- combat_betas %>%
select(toAverage$C62) %>%
corstars(method = "spearman")
combat_rep15 <- combat_betas %>%
select(toAverage$SV67) %>%
corstars(method = "spearman")
combat_rep17 <- combat_betas %>%
select(toAverage$SV76) %>%
corstars(method = "spearman")
combat_reps <- c(combat_rep4,combat_rep8,combat_rep15,combat_rep17)
combat_reps
## $C29M.2.1
## [1] "" " 0.978****"
##
## $C62M.1.1
## [1] "" " 0.9852****"
##
## $`SV067bb.1.1 - quad4`
## [1] "" " 0.9851****" " 0.9844****" " 0.9835****"
##
## $`SV067bb.1.1 - quad2 - eq100`
## [1] "" "" " 0.9854****" " 0.9840****"
##
## $SV067b.b.1.1
## [1] "" "" "" " 0.9835****"
##
## $`SV076M.1.1 - dup1a`
## [1] "" " 0.9788****"
combat_correlations <- c(0.9780, # C29M
0.9852, # C62M
mean(0.9851,0.9844,0.9835,0.9854,0.9840,0.9835), # SV67bb
0.9788) # SV76M
# PCA biplots
rownames(df_combat) <- df_combat$methylation_id
df_reps <- df_combat %>%
mutate(id = paste0(dyad,tissue)) %>%
filter(id %in% c("C29mother_venous_blood",
"C62mother_venous_blood",
"SV67baby_venous_blood",
"SV76mother_venous_blood"))
df_reps$dyad <- as.factor(df_reps$dyad)
df_reps <- as.data.frame(df_reps)
rownames(df_reps) <- df_reps$methylation_id
combat_betas_reps <- combat_betas %>%
select(df_reps$methylation_id)
identical(rownames(df_reps),colnames(combat_betas_reps))
## [1] TRUE
r <- pca(na.omit(combat_betas_reps), metadata = df_reps)
screeplot(r, components = getComponents(p, 1:10))
biplot(r, lab = r$metadata$dyad ,colby = 'dyad',
hline = 0, vline = 0,
xlim = c(-50,50),
ylim = c(-50,50),
legendPosition = 'right',
title = 'ComBat Betas PCA bi-plot',
subtitle = 'PC1 versus PC2')
We can do this by contamination score for technical replicates.
keep_reps <- df_combat %>%
mutate(id = paste0(dyad,tissue)) %>%
group_by(id) %>%
filter(n()>1) %>%
arrange(desc(outlier)) %>% # "outlier" is the column with contamination score
summarise_all(last)
# keep_reps has the 5 I want to keep...but I actually need
# the ones I want to remove...
remove_reps <- df_combat %>%
mutate(id = paste0(dyad,tissue)) %>%
filter(id %in% keep_reps$id) %>%
filter(methylation_id %in% keep_reps$methylation_id==FALSE) %>%
select(methylation_id,outlier)
length(remove_reps$methylation_id) # 7 samples to remove prior to EWAS.
## [1] 7
# final combat betas
# Remove replicates
fbetas <- combat_betas[!(colnames(combat_betas) %in% remove_reps$methylation_id)]
# That takes us from 323 samples to 316 samples.
######################################################
# Remove siblings and samples designated for dropping
# HERE
#####################################################
fbetas <- fbetas[!(colnames(fbetas) %in% droppers)]
# That takes us from 316 samples to 302 samples.
dim(fbetas)
## [1] 706987 302
df7 <- df_combat %>%
filter(methylation_id %in% colnames(fbetas)) %>%
mutate(bmi = mwgt/((mhgt/100)^2)) %>%
dplyr::rename(parous = is_this_your_first_child)
# Get mothers dataset
dfm <- df7 %>%
filter(tissue == "mother_venous_blood")
mom_betas <- fbetas[colnames(fbetas) %in% dfm$methylation_id]
# Add in cell type proportions for mothers, and first 4-5 PCs
# of cell type
mom_cells <- read.csv(here("output","congo_mother_cell_composition_20220613.csv"))
rownames(mom_cells) <- mom_cells$IID
mom_cells <- mom_cells[-c(1:2)]
mom_pr_cells <- prcomp(as.matrix(mom_cells))
var_explained_mom = mom_pr_cells$sdev^2 / sum(mom_pr_cells$sdev^2)
cat("First PC explains",round(var_explained_mom[1]*100,digits = 2),
"percent of the variance in cell type.")
## First PC explains 89.22 percent of the variance in cell type.
qplot(c(1:6),var_explained_mom) +
geom_line() +
xlab("Principal Component") +
ylab("Variance Explained") +
ggtitle("Scree Plot for Mothers Cell Type") +
theme_light() +
ylim(0, 1)
mom_pr_cells <- as.data.frame(mom_pr_cells$x) %>%
rownames_to_column(var = "methylation_id") %>%
rename(PC1_cells = PC1,
PC2_cells = PC2,
PC3_cells = PC3,
PC4_cells = PC4,
PC5_cells = PC5,
PC6_cells = PC6)
dfm <- mom_cells %>%
rownames_to_column(var = "methylation_id") %>%
left_join(mom_pr_cells, by = c("methylation_id" = "methylation_id")) %>%
right_join(dfm, by = c("methylation_id" = "methylation_id"))
# Add in top 10 methylation PCs for all mothers
mom_meth_pcs <- prcomp(as.matrix(t(na.omit(mom_betas))))
mom_meth_pcs <- mom_meth_pcs$x[,1:10]
dfm <- as.data.frame(mom_meth_pcs) %>%
rownames_to_column(var = "methylation_id") %>%
right_join(dfm, by = c("methylation_id" = "methylation_id"))
# Order the metadata so it's the same as the column names order
mlink <- match(dfm$methylation_id, colnames(mom_betas))
dfm <- dfm[mlink,]
dfm <- as.data.frame(dfm)
rownames(dfm) <- dfm$methylation_id
identical(rownames(dfm),colnames(mom_betas)) # TRUE
## [1] TRUE
#####################
# Get babies dataset
####################
dfb <- df7 %>%
filter(tissue == "baby_venous_blood")
baby_betas <- fbetas[colnames(fbetas) %in% dfb$methylation_id]
# Add in cell type proportions for babies, and first 4-5 PCs
# of cell type
baby_cells <- read.csv(here("output","congo_baby_cell_composition_20220613.csv"))
rownames(baby_cells) <- baby_cells$IID
baby_cells <- baby_cells[-c(1:2)]
baby_pr_cells <- prcomp(as.matrix(baby_cells))
var_explained_baby = baby_pr_cells$sdev^2 / sum(baby_pr_cells$sdev^2)
cat("First PC explains",round(var_explained_baby[1]*100,digits = 2),
"percent of the variance in cell type.")
## First PC explains 62.18 percent of the variance in cell type.
cat("First two PCs explain",
round(var_explained_baby[1]*100,digits = 2) +
round(var_explained_baby[2]*100,digits = 2),
"percent of the variance in cell type.")
## First two PCs explain 90.55 percent of the variance in cell type.
qplot(c(1:7),var_explained_baby) +
geom_line() +
xlab("Principal Component") +
ylab("Variance Explained") +
ggtitle("Scree Plot for Babies Cell Type") +
theme_light() +
ylim(0, 1)
baby_pr_cells <- as.data.frame(baby_pr_cells$x) %>%
rownames_to_column(var = "methylation_id") %>%
rename(PC1_cells = PC1,
PC2_cells = PC2,
PC3_cells = PC3,
PC4_cells = PC4,
PC5_cells = PC5,
PC6_cells = PC6,
PC7_cells = PC7)
dfb <- baby_cells %>%
rownames_to_column(var = "methylation_id") %>%
left_join(baby_pr_cells, by = c("methylation_id" = "methylation_id")) %>%
right_join(dfb, by = c("methylation_id" = "methylation_id"))
# Add in top 10 methylation PCs for all babies
baby_meth_pcs <- prcomp(as.matrix(t(na.omit(baby_betas))))
baby_meth_pcs <- baby_meth_pcs$x[,1:10]
dfb <- as.data.frame(baby_meth_pcs) %>%
rownames_to_column(var = "methylation_id") %>%
right_join(dfb, by = c("methylation_id" = "methylation_id"))
# Order the metadata so it's the same as the column names order
blink <- match(dfb$methylation_id, colnames(baby_betas))
dfb <- dfb[blink,]
dfb <- as.data.frame(dfb)
rownames(dfb) <- dfb$methylation_id
identical(rownames(dfb),colnames(baby_betas)) # TRUE
## [1] TRUE
This leaves us with 151 mothers and 151 babies.
# Use this package to easily
# distinguish between the two cohorts in pairs plots.
# Mothers
ppcols1 <- c("gtsum","setot","achronic","awar_nr",
"bmi","Age","sex","pcsec","parous","ga_meth")
ppcols2 <- c("gtsum","setot","achronic","awar_nr","Neu",
"NK","Bcell","CD4T","CD8T","Mono")
# Pairs plot of some demographics and cell type
pm1 <- ggpairs(dfm, columns = ppcols1, ggplot2::aes(color=cohort)) +
theme_bw()
pm2 <- ggpairs(dfm, columns = ppcols2, ggplot2::aes(color=cohort)) +
theme_bw()
pm1
pm2
# Correlation Matrix of methylation PCs, covariates, predictors, and cell types
cordat <- dfm %>%
select(PC1:PC10,gtsum,setot,achronic,awar_nr,
bmi,Age,ga_meth,Neu,NK,
Bcell,CD4T,CD8T,Mono,bwgt)
cormat <- corstars(as.matrix(cordat))
kbl(cormat, digits = 2,
caption = "Pearson Correlations for Predictors, Covariates, and Cell Types") %>%
kable_styling("hover")
| PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | PC8 | PC9 | PC10 | gtsum | setot | achronic | awar_nr | bmi | Age | ga_meth | Neu | NK | Bcell | CD4T | CD8T | Mono | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| PC1 | |||||||||||||||||||||||
| PC2 | 0.0000 | ||||||||||||||||||||||
| PC3 | 0.0000 | 0.0000 | |||||||||||||||||||||
| PC4 | 0.0000 | 0.0000 | 0.0000 | ||||||||||||||||||||
| PC5 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | |||||||||||||||||||
| PC6 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | ||||||||||||||||||
| PC7 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | |||||||||||||||||
| PC8 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | ||||||||||||||||
| PC9 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | |||||||||||||||
| PC10 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | ||||||||||||||
| gtsum | -0.0282 | 0.0791 | -0.0355 | -0.0462 | -0.0523 | -0.1814* | 0.1798* | -0.0383 | 0.0234 | -0.1142 | |||||||||||||
| setot | 0.1254 | -0.0377 | -0.0882 | 0.0497 | -0.0593 | -0.0222 | 0.0312 | 0.1030 | 0.0018 | 0.0320 | 0.3974**** | ||||||||||||
| achronic | 0.0684 | -0.0592 | -0.1687* | 0.0189 | -0.0707 | -0.0252 | 0.0586 | 0.2042* | 0.0260 | 0.0055 | 0.2953*** | 0.4372**** | |||||||||||
| awar_nr | 0.0207 | 0.0621 | 0.0578 | -0.0683 | 0.0297 | -0.0906 | 0.0838 | 0.0640 | 0.1381 | -0.1085 | 0.5886**** | 0.3659**** | 0.2398** | ||||||||||
| bmi | -0.0077 | 0.0452 | 0.1195 | -0.0677 | -0.0721 | -0.0066 | -0.1299 | -0.2683*** | -0.1075 | -0.0085 | -0.0048 | -0.1311 | -0.2953*** | -0.0773 | |||||||||
| Age | -0.0057 | 0.0654 | -0.0059 | -0.0255 | 0.1680* | 0.0392 | -0.0303 | -0.3140**** | 0.0303 | -0.0140 | -0.0118 | -0.3045*** | -0.4504**** | 0.0039 | 0.3845**** | ||||||||
| ga_meth | -0.0594 | 0.0351 | 0.1420 | 0.0725 | 0.0224 | -0.0301 | 0.0118 | -0.0872 | -0.0100 | -0.0417 | 0.0328 | 0.0269 | -0.1167 | -0.0284 | 0.1783* | 0.2520** | |||||||
| Neu | 0.9339**** | -0.0756 | 0.1033 | 0.1012 | 0.0423 | -0.0544 | 0.0289 | 0.0266 | 0.0445 | -0.0676 | 0.0079 | 0.1406 | 0.0610 | 0.0567 | -0.0398 | -0.0227 | -0.0410 | ||||||
| NK | -0.7527**** | -0.0547 | -0.0559 | -0.0589 | 0.2326** | -0.0297 | -0.0961 | 0.0282 | 0.1675* | 0.0768 | -0.0252 | -0.1172 | -0.0832 | -0.0475 | -0.0527 | -0.0065 | -0.0398 | -0.7456**** | |||||
| Bcell | -0.6540**** | 0.3027*** | -0.2324** | -0.2193** | 0.0725 | -0.1111 | -0.0798 | -0.0592 | -0.0876 | 0.1306 | -0.0086 | -0.0617 | 0.0601 | -0.0222 | -0.0006 | -0.0612 | 0.0184 | -0.7384**** | 0.4985**** | ||||
| CD4T | -0.7989**** | 0.1696* | -0.1042 | -0.1311 | -0.2312** | 0.1313 | 0.0045 | -0.0605 | -0.1273 | -0.0036 | -0.0121 | -0.1587 | -0.0657 | -0.0426 | 0.1074 | 0.0530 | 0.0574 | -0.8534**** | 0.4594**** | 0.6195**** | |||
| CD8T | -0.8630**** | -0.0513 | -0.0199 | -0.0331 | 0.1135 | 0.1228 | -0.0207 | 0.0601 | -0.0149 | -0.0015 | -0.0369 | -0.1013 | -0.0922 | -0.0772 | -0.0604 | 0.0022 | 0.0213 | -0.8317**** | 0.6857**** | 0.5051**** | 0.5671**** | ||
| Mono | -0.3329**** | -0.0180 | -0.0621 | -0.0062 | -0.0804 | -0.2048* | 0.0196 | -0.0998 | -0.0060 | 0.1421 | 0.0924 | -0.0222 | 0.0340 | 0.0220 | 0.0942 | 0.0371 | 0.0689 | -0.5305**** | 0.3487**** | 0.3309**** | 0.2696*** | 0.2828*** | |
| bwgt | -0.0519 | 0.0275 | -0.0116 | 0.0651 | -0.0506 | -0.0783 | -0.1097 | -0.2318** | 0.1098 | -0.0360 | -0.0477 | -0.0196 | -0.2889*** | -0.1879* | 0.3984**** | 0.4469**** | 0.4500**** | -0.0665 | 0.0569 | -0.0017 | 0.0382 | 0.0673 | 0.0895 |
# Heatmap
d <- cor(cordat, use = "pairwise.complete.obs")
d <- round(d, 1)
p.mat <- cor(d[,-1])
##############
# HEATMAP
##############
cor.mtest <- function(mat, ...) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j], ...)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
# matrix of the p-value of the correlation
p.mat <- cor.mtest(cordat)
corrplot::corrplot(d,
method="color",
#col=col(200),
type="upper",
#order="hclust",
addCoef.col = "black",
tl.col="black",
number.cex = 0.7,
tl.cex = 0.6,
tl.srt=45,
p.mat =p.mat,
sig.level = 0.5,
insig = "label_sig")
Use robust regression for this. With babies, control for infant sex, maternal BMI, alcohol, parity, maternal age, cohort, cell type PCs 1 and 2, and then add the maternal stress predictor.
mom_betas <- t(mom_betas)
identical(rownames(mom_betas),rownames(dfm))
## [1] TRUE
baby_betas <- t(baby_betas)
identical(rownames(baby_betas),rownames(dfb))
## [1] TRUE
mod_sum <- function(models){
broom::tidy(lmtest::coeftest(models, vcov. = sandwich), conf.int = TRUE)
}
mod_sum_robust <- function (models) {
dat <- tryCatch(
{
broom::tidy(lmtest::coeftest(models, vcov. = sandwich), conf.int = TRUE)
},
error = function(cond) {
return(NA)
},
warning = function(cond) {
return(NULL)
}
)
return(dat)
}
getValues <- function(x) {
coef <- sapply(x, function (x) x[[length(x[[1]]),"estimate"]])
std_error <- sapply(x, function (x) x[[length(x[[1]]),"std.error"]])
test_stat <- sapply(x, function (x) x[[length(x[[1]]),"statistic"]])
pval <- sapply(x, function (x) x[[length(x[[1]]),"p.value"]])
conf_low <- sapply(x, function (x) x[[length(x[[1]]),"conf.low"]])
conf_high <- sapply(x, function (x) x[[length(x[[1]]),"conf.high"]])
df <- cbind.data.frame(coef,std_error,test_stat,pval,conf_low,conf_high)
return(df)
}
get_Values_Direct_Robust <- function(x) {
df <- tryCatch(
{
coef <- sapply(x, function (x) x[[length(x[[1]]),"estimate"]])
std_error <- sapply(x, function (x) x[[length(x[[1]]),"std.error"]])
test_stat <- sapply(x, function (x) x[[length(x[[1]]),"statistic"]])
pval <- sapply(x, function (x) x[[length(x[[1]]),"p.value"]])
conf_low <- sapply(x, function (x) x[[length(x[[1]]),"conf.low"]])
conf_high <- sapply(x, function (x) x[[length(x[[1]]),"conf.high"]])
df <- cbind.data.frame(coef,std_error,test_stat,pval,conf_low,conf_high)
},
error = function(cond) {
return(NA)
},
warning = function(cond) {
return(NULL)
}
)
return(df)
}
if (file.exists(here("output","mom_gtsum_params1_cell_corrected.rds"))){
mom_gtsum_params1_cell_corrected <- readRDS(here("output","mom_gtsum_params1_cell_corrected.rds"))
} else {
identical(rownames(dfm),rownames(mom_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfm")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_mom_gtsum_1 <- parApply(cl,mom_betas[,1:250000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ Age +
bmi +
parous +
pcsec +
palco +
cohort +
PC1_cells +
gtsum,
data = dfm)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("tidy","mod_sum_robust","coeftest","sandwich"))
mom_gtsum_results1_cell_corrected <- parLapply(cl,regs_mom_gtsum_1, mod_sum_robust)
stopCluster(cl)
rm(regs_mom_gtsum_1)
mom_gtsum_results1_cell_corrected <- Filter(Negate(anyNA), mom_gtsum_results1_cell_corrected)
list.save(mom_gtsum_results1_cell_corrected, file = here("output","mom_gtsum_results1_cell_corrected.rds"))
mom_gtsum_params1_cell_corrected <- getValues(mom_gtsum_results1_cell_corrected)
saveRDS(mom_gtsum_params1_cell_corrected, here("output","mom_gtsum_params1_cell_corrected.rds"))
}
if (file.exists(here("output","mom_gtsum_params2_cell_corrected.rds"))){
mom_gtsum_params2_cell_corrected <- readRDS(here("output","mom_gtsum_params2_cell_corrected.rds"))
} else {
identical(rownames(dfm),rownames(mom_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfm")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_mom_gtsum_2 <- parApply(cl,mom_betas[,250001:500000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ Age +
bmi +
parous +
pcsec +
palco +
cohort +
PC1_cells +
gtsum,
data = dfm)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("tidy","mod_sum_robust","coeftest","sandwich"))
mom_gtsum_results2_cell_corrected <- parLapply(cl,regs_mom_gtsum_2, mod_sum_robust)
stopCluster(cl)
rm(regs_mom_gtsum_2)
mom_gtsum_results2_cell_corrected <- Filter(Negate(anyNA), mom_gtsum_results2_cell_corrected)
list.save(mom_gtsum_results2_cell_corrected, file = here("output","mom_gtsum_results2_cell_corrected.rds"))
mom_gtsum_params2_cell_corrected <- getValues(mom_gtsum_results2_cell_corrected)
saveRDS(mom_gtsum_params2_cell_corrected, here("output","mom_gtsum_params2_cell_corrected.rds"))
}
if (file.exists(here("output","mom_gtsum_params3_cell_corrected.rds"))){
mom_gtsum_params3_cell_corrected <- readRDS(here("output","mom_gtsum_params3_cell_corrected.rds"))
} else {
identical(rownames(dfm),rownames(mom_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfm")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_mom_gtsum_3 <- parApply(cl,mom_betas[,500001:ncol(mom_betas)],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ Age +
bmi +
parous +
pcsec +
palco +
cohort +
PC1_cells +
gtsum,
data = dfm)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("tidy","mod_sum_robust","coeftest","sandwich"))
mom_gtsum_results3_cell_corrected <- parLapply(cl,regs_mom_gtsum_3, mod_sum_robust)
stopCluster(cl)
rm(regs_mom_gtsum_3)
mom_gtsum_results3_cell_corrected <- Filter(Negate(anyNA), mom_gtsum_results3_cell_corrected)
list.save(mom_gtsum_results3_cell_corrected, file = here("output","mom_gtsum_results3_cell_corrected.rds"))
mom_gtsum_params3_cell_corrected <- getValues(mom_gtsum_results3_cell_corrected)
saveRDS(mom_gtsum_params3_cell_corrected, here("output","mom_gtsum_params3_cell_corrected.rds"))
}
if (file.exists(here("output","mom_setot_params1_cell_corrected.rds"))){
mom_setot_params1_cell_corrected <- readRDS(here("output","mom_setot_params1_cell_corrected.rds"))
} else {
identical(rownames(dfm),rownames(mom_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfm")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_mom_setot_1 <- parApply(cl,mom_betas[,1:250000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ Age +
bmi +
parous +
pcsec +
palco +
cohort +
PC1_cells +
setot,
data = dfm)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("tidy","mod_sum_robust","coeftest","sandwich"))
mom_setot_results1_cell_corrected <- parLapply(cl,regs_mom_setot_1, mod_sum_robust)
stopCluster(cl)
rm(regs_mom_setot_1)
mom_setot_results1_cell_corrected <- Filter(Negate(anyNA), mom_setot_results1_cell_corrected)
list.save(mom_setot_results1_cell_corrected, file = here("output","mom_setot_results1_cell_corrected.rds"))
mom_setot_params1_cell_corrected <- getValues(mom_setot_results1_cell_corrected)
saveRDS(mom_setot_params1_cell_corrected, here("output","mom_setot_params1_cell_corrected.rds"))
}
if (file.exists(here("output","mom_setot_params2_cell_corrected.rds"))){
mom_setot_params2_cell_corrected <- readRDS(here("output","mom_setot_params2_cell_corrected.rds"))
} else {
identical(rownames(dfm),rownames(mom_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfm")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_mom_setot_2 <- parApply(cl,mom_betas[,250001:500000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ Age +
bmi +
parous +
pcsec +
palco +
cohort +
PC1_cells +
setot,
data = dfm)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("tidy","mod_sum_robust","coeftest","sandwich"))
mom_setot_results2_cell_corrected <- parLapply(cl,regs_mom_setot_2, mod_sum_robust)
stopCluster(cl)
rm(regs_mom_setot_2)
mom_setot_results2_cell_corrected <- Filter(Negate(anyNA), mom_setot_results2_cell_corrected)
list.save(mom_setot_results2_cell_corrected, file = here("output","mom_setot_results2_cell_corrected.rds"))
mom_setot_params2_cell_corrected <- getValues(mom_setot_results2_cell_corrected)
saveRDS(mom_setot_params2_cell_corrected, here("output","mom_setot_params2_cell_corrected.rds"))
}
if (file.exists(here("output","mom_setot_params3_cell_corrected.rds"))){
mom_setot_params3_cell_corrected <- readRDS(here("output","mom_setot_params3_cell_corrected.rds"))
} else {
identical(rownames(dfm),rownames(mom_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfm")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_mom_setot_3 <- parApply(cl,mom_betas[,500001:ncol(mom_betas)],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ Age +
bmi +
parous +
pcsec +
palco +
cohort +
PC1_cells +
setot,
data = dfm)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("tidy","mod_sum_robust","coeftest","sandwich"))
mom_setot_results3_cell_corrected <- parLapply(cl,regs_mom_setot_3, mod_sum_robust)
stopCluster(cl)
rm(regs_mom_setot_3)
mom_setot_results3_cell_corrected <- Filter(Negate(anyNA), mom_setot_results3_cell_corrected)
list.save(mom_setot_results3_cell_corrected, file = here("output","mom_setot_results3_cell_corrected.rds"))
mom_setot_params3_cell_corrected <- getValues(mom_setot_results3_cell_corrected)
saveRDS(mom_setot_params3_cell_corrected, here("output","mom_setot_params3_cell_corrected.rds"))
}
if (file.exists(here("output","mom_achronic_params1_cell_corrected.rds"))){
mom_achronic_params1_cell_corrected <- readRDS(here("output","mom_achronic_params1_cell_corrected.rds"))
} else {
identical(rownames(dfm),rownames(mom_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfm")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_mom_achronic_1 <- parApply(cl,mom_betas[,1:250000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ Age +
bmi +
parous +
pcsec +
palco +
cohort +
PC1_cells +
achronic,
data = dfm)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("tidy","mod_sum_robust","coeftest","sandwich"))
mom_achronic_results1_cell_corrected <- parLapply(cl,regs_mom_achronic_1, mod_sum_robust)
stopCluster(cl)
rm(regs_mom_achronic_1)
mom_achronic_results1_cell_corrected <- Filter(Negate(anyNA), mom_achronic_results1_cell_corrected)
list.save(mom_achronic_results1_cell_corrected, file = here("output","mom_achronic_results1_cell_corrected.rds"))
mom_achronic_params1_cell_corrected <- getValues(mom_achronic_results1_cell_corrected)
saveRDS(mom_achronic_params1_cell_corrected, here("output","mom_achronic_params1_cell_corrected.rds"))
}
if (file.exists(here("output","mom_achronic_params2_cell_corrected.rds"))){
mom_achronic_params2_cell_corrected <- readRDS(here("output","mom_achronic_params2_cell_corrected.rds"))
} else {
identical(rownames(dfm),rownames(mom_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfm")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_mom_achronic_2 <- parApply(cl,mom_betas[,250001:500000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ Age +
bmi +
parous +
pcsec +
palco +
cohort +
PC1_cells +
achronic,
data = dfm)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("tidy","mod_sum_robust","coeftest","sandwich"))
mom_achronic_results2_cell_corrected <- parLapply(cl,regs_mom_achronic_2, mod_sum_robust)
stopCluster(cl)
rm(regs_mom_achronic_2)
mom_achronic_results2_cell_corrected <- Filter(Negate(anyNA), mom_achronic_results2_cell_corrected)
list.save(mom_achronic_results2_cell_corrected, file = here("output","mom_achronic_results2_cell_corrected.rds"))
mom_achronic_params2_cell_corrected <- getValues(mom_achronic_results2_cell_corrected)
saveRDS(mom_achronic_params2_cell_corrected, here("output","mom_achronic_params2_cell_corrected.rds"))
}
if (file.exists(here("output","mom_achronic_params3_cell_corrected.rds"))){
mom_achronic_params3_cell_corrected <- readRDS(here("output","mom_achronic_params3_cell_corrected.rds"))
} else {
identical(rownames(dfm),rownames(mom_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfm")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_mom_achronic_3 <- parApply(cl,mom_betas[,500001:ncol(mom_betas)],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ Age +
bmi +
parous +
pcsec +
palco +
cohort +
PC1_cells +
achronic,
data = dfm)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("tidy","mod_sum_robust","coeftest","sandwich"))
mom_achronic_results3_cell_corrected <- parLapply(cl,regs_mom_achronic_3, mod_sum_robust)
stopCluster(cl)
rm(regs_mom_achronic_3)
mom_achronic_results3_cell_corrected <- Filter(Negate(anyNA), mom_achronic_results3_cell_corrected)
list.save(mom_achronic_results3_cell_corrected, file = here("output","mom_achronic_results3_cell_corrected.rds"))
mom_achronic_params3_cell_corrected <- getValues(mom_achronic_results3_cell_corrected)
saveRDS(mom_achronic_params3_cell_corrected, here("output","mom_achronic_params3_cell_corrected.rds"))
}
if (file.exists(here("output","mom_awar_nr_params1_cell_corrected.rds"))){
mom_awar_nr_params1_cell_corrected <- readRDS(here("output","mom_awar_nr_params1_cell_corrected.rds"))
} else {
identical(rownames(dfm),rownames(mom_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfm")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_mom_awar_nr_1 <- parApply(cl,mom_betas[,1:250000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ Age +
bmi +
parous +
pcsec +
palco +
cohort +
PC1_cells +
awar_nr,
data = dfm)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("tidy","mod_sum_robust","coeftest","sandwich"))
mom_awar_nr_results1_cell_corrected <- parLapply(cl,regs_mom_awar_nr_1, mod_sum_robust)
stopCluster(cl)
rm(regs_mom_awar_nr_1)
mom_awar_nr_results1_cell_corrected <- Filter(Negate(anyNA), mom_awar_nr_results1_cell_corrected)
list.save(mom_awar_nr_results1_cell_corrected, file = here("output","mom_awar_nr_results1_cell_corrected.rds"))
mom_awar_nr_params1_cell_corrected <- getValues(mom_awar_nr_results1_cell_corrected)
saveRDS(mom_awar_nr_params1_cell_corrected, here("output","mom_awar_nr_params1_cell_corrected.rds"))
}
if (file.exists(here("output","mom_awar_nr_params2_cell_corrected.rds"))){
mom_awar_nr_params2_cell_corrected <- readRDS(here("output","mom_awar_nr_params2_cell_corrected.rds"))
} else {
identical(rownames(dfm),rownames(mom_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfm")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_mom_awar_nr_2 <- parApply(cl,mom_betas[,250001:500000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ Age +
bmi +
parous +
pcsec +
palco +
cohort +
PC1_cells +
awar_nr,
data = dfm)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("tidy","mod_sum_robust","coeftest","sandwich"))
mom_awar_nr_results2_cell_corrected <- parLapply(cl,regs_mom_awar_nr_2, mod_sum_robust)
stopCluster(cl)
rm(regs_mom_awar_nr_2)
mom_awar_nr_results2_cell_corrected <- Filter(Negate(anyNA), mom_awar_nr_results2_cell_corrected)
list.save(mom_awar_nr_results2_cell_corrected, file = here("output","mom_awar_nr_results2_cell_corrected.rds"))
mom_awar_nr_params2_cell_corrected <- getValues(mom_awar_nr_results2_cell_corrected)
saveRDS(mom_awar_nr_params2_cell_corrected, here("output","mom_awar_nr_params2_cell_corrected.rds"))
}
if (file.exists(here("output","mom_awar_nr_params3_cell_corrected.rds"))){
mom_awar_nr_params3_cell_corrected <- readRDS(here("output","mom_awar_nr_params3_cell_corrected.rds"))
} else {
identical(rownames(dfm),rownames(mom_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfm")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_mom_awar_nr_3 <- parApply(cl,mom_betas[,500001:ncol(mom_betas)],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ Age +
bmi +
parous +
pcsec +
palco +
cohort +
PC1_cells +
awar_nr,
data = dfm)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("tidy","mod_sum_robust","coeftest","sandwich"))
mom_awar_nr_results3_cell_corrected <- parLapply(cl,regs_mom_awar_nr_3, mod_sum_robust)
stopCluster(cl)
rm(regs_mom_awar_nr_3)
mom_awar_nr_results3_cell_corrected <- Filter(Negate(anyNA), mom_awar_nr_results3_cell_corrected)
list.save(mom_awar_nr_results3_cell_corrected, file = here("output","mom_awar_nr_results3_cell_corrected.rds"))
mom_awar_nr_params3_cell_corrected <- getValues(mom_awar_nr_results3_cell_corrected)
saveRDS(mom_awar_nr_params3_cell_corrected, here("output","mom_awar_nr_params3_cell_corrected.rds"))
}
if (file.exists(here("output","baby_gtsum_params1_cell_corrected.rds"))){
baby_gtsum_params1_cell_corrected <- readRDS(here("output","baby_gtsum_params1_cell_corrected.rds"))
} else {
identical(rownames(dfb),rownames(baby_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfb")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_baby_gtsum_1 <- parApply(cl,baby_betas[,1:250000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ age + # lower case age is mother's age
bmi +
parous +
pcsec +
palco +
sex +
ga_meth +
cohort +
PC1_cells +
PC2_cells +
gtsum,
data = dfb)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
baby_gtsum_results1_cell_corrected <- lapply(regs_baby_gtsum_1, mod_sum_robust)
rm(regs_baby_gtsum_1)
baby_gtsum_results1_cell_corrected <- Filter(Negate(anyNA), baby_gtsum_results1_cell_corrected)
list.save(baby_gtsum_results1_cell_corrected, file = here("output","baby_gtsum_results1_cell_corrected.rds"))
baby_gtsum_params1_cell_corrected <- getValues(baby_gtsum_results1_cell_corrected)
saveRDS(baby_gtsum_params1_cell_corrected, here("output","baby_gtsum_params1_cell_corrected.rds"))
}
if (file.exists(here("output","baby_gtsum_params2_cell_corrected.rds"))){
baby_gtsum_params2_cell_corrected <- readRDS(here("output","baby_gtsum_params2_cell_corrected.rds"))
} else {
identical(rownames(dfb),rownames(baby_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfb")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_baby_gtsum_2 <- parApply(cl,baby_betas[,250001:500000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ age + # lower case age is mother's age
bmi +
parous +
pcsec +
palco +
sex +
ga_meth +
cohort +
PC1_cells +
PC2_cells +
gtsum,
data = dfb)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
baby_gtsum_results2_cell_corrected <- lapply(regs_baby_gtsum_2, mod_sum_robust)
rm(regs_baby_gtsum_2)
baby_gtsum_results2_cell_corrected <- Filter(Negate(anyNA), baby_gtsum_results2_cell_corrected)
list.save(baby_gtsum_results2_cell_corrected, file = here("output","baby_gtsum_results2_cell_corrected.rds"))
baby_gtsum_params2_cell_corrected <- getValues(baby_gtsum_results2_cell_corrected)
saveRDS(baby_gtsum_params2_cell_corrected, here("output","baby_gtsum_params2_cell_corrected.rds"))
}
if (file.exists(here("output","baby_gtsum_params3_cell_corrected.rds"))){
baby_gtsum_params3_cell_corrected <- readRDS(here("output","baby_gtsum_params3_cell_corrected.rds"))
} else {
identical(rownames(dfb),rownames(baby_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfb")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_baby_gtsum_3 <- parApply(cl,baby_betas[,500001:ncol(baby_betas)],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ age + # lower case age is mother's age
bmi +
parous +
pcsec +
palco +
sex +
ga_meth +
cohort +
PC1_cells +
PC2_cells +
gtsum,
data = dfb)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
baby_gtsum_results3_cell_corrected <- lapply(regs_baby_gtsum_3, mod_sum_robust)
rm(regs_baby_gtsum_3)
baby_gtsum_results3_cell_corrected <- Filter(Negate(anyNA), baby_gtsum_results3_cell_corrected)
list.save(baby_gtsum_results3_cell_corrected, file = here("output","baby_gtsum_results3_cell_corrected.rds"))
baby_gtsum_params3_cell_corrected <- getValues(baby_gtsum_results3_cell_corrected)
saveRDS(baby_gtsum_params3_cell_corrected, here("output","baby_gtsum_params3_cell_corrected.rds"))
}
if (file.exists(here("output","baby_setot_params1_cell_corrected.rds"))){
baby_setot_params1_cell_corrected <- readRDS(here("output","baby_setot_params1_cell_corrected.rds"))
} else {
identical(rownames(dfb),rownames(baby_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfb")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_baby_setot_1 <- parApply(cl,baby_betas[,1:250000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ age + # lower case age is mother's age
bmi +
parous +
pcsec +
palco +
sex +
ga_meth +
cohort +
PC1_cells +
PC2_cells +
setot,
data = dfb)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
baby_setot_results1_cell_corrected <- lapply(regs_baby_setot_1, mod_sum_robust)
rm(regs_baby_setot_1)
baby_setot_results1_cell_corrected <- Filter(Negate(anyNA), baby_setot_results1_cell_corrected)
list.save(baby_setot_results1_cell_corrected, file = here("output","baby_setot_results1_cell_corrected.rds"))
baby_setot_params1_cell_corrected <- getValues(baby_setot_results1_cell_corrected)
saveRDS(baby_setot_params1_cell_corrected, here("output","baby_setot_params1_cell_corrected.rds"))
}
if (file.exists(here("output","baby_setot_params2_cell_corrected.rds"))){
baby_setot_params2_cell_corrected <- readRDS(here("output","baby_setot_params2_cell_corrected.rds"))
} else {
identical(rownames(dfb),rownames(baby_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfb")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_baby_setot_2 <- parApply(cl,baby_betas[,250001:500000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ age + # lower case age is mother's age
bmi +
parous +
pcsec +
palco +
sex +
ga_meth +
cohort +
PC1_cells +
PC2_cells +
setot,
data = dfb)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
baby_setot_results2_cell_corrected <- lapply(regs_baby_setot_2, mod_sum_robust)
rm(regs_baby_setot_2)
baby_setot_results2_cell_corrected <- Filter(Negate(anyNA), baby_setot_results2_cell_corrected)
list.save(baby_setot_results2_cell_corrected, file = here("output","baby_setot_results2_cell_corrected.rds"))
baby_setot_params2_cell_corrected <- getValues(baby_setot_results2_cell_corrected)
saveRDS(baby_setot_params2_cell_corrected, here("output","baby_setot_params2_cell_corrected.rds"))
}
if (file.exists(here("output","baby_setot_params3_cell_corrected.rds"))){
baby_setot_params3_cell_corrected <- readRDS(here("output","baby_setot_params3_cell_corrected.rds"))
} else {
identical(rownames(dfb),rownames(baby_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfb")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_baby_setot_3 <- parApply(cl,baby_betas[,500001:ncol(baby_betas)],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ age + # lower case age is mother's age
bmi +
parous +
pcsec +
palco +
sex +
ga_meth +
cohort +
PC1_cells +
PC2_cells +
setot,
data = dfb)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
baby_setot_results3_cell_corrected <- lapply(regs_baby_setot_3, mod_sum_robust)
rm(regs_baby_setot_3)
baby_setot_results3_cell_corrected <- Filter(Negate(anyNA), baby_setot_results3_cell_corrected)
list.save(baby_setot_results3_cell_corrected, file = here("output","baby_setot_results3_cell_corrected.rds"))
baby_setot_params3_cell_corrected <- getValues(baby_setot_results3_cell_corrected)
saveRDS(baby_setot_params3_cell_corrected, here("output","baby_setot_params3_cell_corrected.rds"))
}
if (file.exists(here("output","baby_achronic_params1_cell_corrected.rds"))){
baby_achronic_params1_cell_corrected <- readRDS(here("output","baby_achronic_params1_cell_corrected.rds"))
} else {
identical(rownames(dfb),rownames(baby_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfb")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_baby_achronic_1 <- parApply(cl,baby_betas[,1:250000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ age + # lower case age is mother's age
bmi +
parous +
pcsec +
palco +
sex +
ga_meth +
cohort +
PC1_cells +
PC2_cells +
achronic,
data = dfb)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
baby_achronic_results1_cell_corrected <- lapply(regs_baby_achronic_1, mod_sum_robust)
rm(regs_baby_achronic_1)
baby_achronic_results1_cell_corrected <- Filter(Negate(anyNA), baby_achronic_results1_cell_corrected)
list.save(baby_achronic_results1_cell_corrected, file = here("output","baby_achronic_results1_cell_corrected.rds"))
baby_achronic_params1_cell_corrected <- getValues(baby_achronic_results1_cell_corrected)
saveRDS(baby_achronic_params1_cell_corrected, here("output","baby_achronic_params1_cell_corrected.rds"))
}
if (file.exists(here("output","baby_achronic_params2_cell_corrected.rds"))){
baby_achronic_params2_cell_corrected <- readRDS(here("output","baby_achronic_params2_cell_corrected.rds"))
} else {
identical(rownames(dfb),rownames(baby_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfb")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_baby_achronic_2 <- parApply(cl,baby_betas[,250001:500000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ age + # lower case age is mother's age
bmi +
parous +
pcsec +
palco +
sex +
ga_meth +
cohort +
PC1_cells +
PC2_cells +
achronic,
data = dfb)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
baby_achronic_results2_cell_corrected <- lapply(regs_baby_achronic_2, mod_sum_robust)
rm(regs_baby_achronic_2)
baby_achronic_results2_cell_corrected <- Filter(Negate(anyNA), baby_achronic_results2_cell_corrected)
list.save(baby_achronic_results2_cell_corrected, file = here("output","baby_achronic_results2_cell_corrected.rds"))
baby_achronic_params2_cell_corrected <- getValues(baby_achronic_results2_cell_corrected)
saveRDS(baby_achronic_params2_cell_corrected, here("output","baby_achronic_params2_cell_corrected.rds"))
}
if (file.exists(here("output","baby_achronic_params3_cell_corrected.rds"))){
baby_achronic_params3_cell_corrected <- readRDS(here("output","baby_achronic_params3_cell_corrected.rds"))
} else {
identical(rownames(dfb),rownames(baby_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfb")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_baby_achronic_3 <- parApply(cl,baby_betas[,500001:ncol(baby_betas)],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ age + # lower case age is mother's age
bmi +
parous +
pcsec +
palco +
sex +
ga_meth +
cohort +
PC1_cells +
PC2_cells +
achronic,
data = dfb)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
baby_achronic_results3_cell_corrected <- lapply(regs_baby_achronic_3, mod_sum_robust)
rm(regs_baby_achronic_3)
baby_achronic_results3_cell_corrected <- Filter(Negate(anyNA), baby_achronic_results3_cell_corrected)
list.save(baby_achronic_results3_cell_corrected, file = here("output","baby_achronic_results3_cell_corrected.rds"))
baby_achronic_params3_cell_corrected <- getValues(baby_achronic_results3_cell_corrected)
saveRDS(baby_achronic_params3_cell_corrected, here("output","baby_achronic_params3_cell_corrected.rds"))
}
if (file.exists(here("output","baby_awar_nr_params1_cell_corrected.rds"))){
baby_awar_nr_params1_cell_corrected <- readRDS(here("output","baby_awar_nr_params1_cell_corrected.rds"))
} else {
identical(rownames(dfb),rownames(baby_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfb")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_baby_awar_nr_1 <- parApply(cl,baby_betas[,1:250000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ age + # lower case age is mother's age
bmi +
parous +
pcsec +
palco +
sex +
ga_meth +
cohort +
PC1_cells +
PC2_cells +
awar_nr,
data = dfb)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
baby_awar_nr_results1_cell_corrected <- lapply(regs_baby_awar_nr_1, mod_sum_robust)
rm(regs_baby_awar_nr_1)
baby_awar_nr_results1_cell_corrected <- Filter(Negate(anyNA), baby_awar_nr_results1_cell_corrected)
list.save(baby_awar_nr_results1_cell_corrected, file = here("output","baby_awar_nr_results1_cell_corrected.rds"))
baby_awar_nr_params1_cell_corrected <- getValues(baby_awar_nr_results1_cell_corrected)
saveRDS(baby_awar_nr_params1_cell_corrected, here("output","baby_awar_nr_params1_cell_corrected.rds"))
}
if (file.exists(here("output","baby_awar_nr_params2_cell_corrected.rds"))){
baby_awar_nr_params2_cell_corrected <- readRDS(here("output","baby_awar_nr_params2_cell_corrected.rds"))
} else {
identical(rownames(dfb),rownames(baby_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfb")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# # This takes 6 minutes with 92 GB RAM.
system.time(
regs_baby_awar_nr_2 <- parApply(cl,baby_betas[,250001:500000],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ age + # lower case age is mother's age
bmi +
parous +
pcsec +
palco +
sex +
ga_meth +
cohort +
PC1_cells +
PC2_cells +
awar_nr,
data = dfb)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
baby_awar_nr_results2_cell_corrected <- lapply(regs_baby_awar_nr_2, mod_sum_robust)
rm(regs_baby_awar_nr_2)
baby_awar_nr_results2_cell_corrected <- Filter(Negate(anyNA), baby_awar_nr_results2_cell_corrected)
list.save(baby_awar_nr_results2_cell_corrected, file = here("output","baby_awar_nr_results2_cell_corrected.rds"))
baby_awar_nr_params2_cell_corrected <- getValues(baby_awar_nr_results2_cell_corrected)
saveRDS(baby_awar_nr_params2_cell_corrected, here("output","baby_awar_nr_params2_cell_corrected.rds"))
}
if (file.exists(here("output","baby_awar_nr_params3_cell_corrected.rds"))){
baby_awar_nr_params3_cell_corrected <- readRDS(here("output","baby_awar_nr_params3_cell_corrected.rds"))
} else {
identical(rownames(dfb),rownames(baby_betas))
# make socket and register
cl <- makeCluster(16)
registerDoParallel(cl)
# export phenotype data only to each worker
clusterExport(cl,"dfb")
clusterExport(cl, c("rlm"))
# Create an error message we can search for easily
error_message <- function() 'fail'
# Export the error_message function to all workers
clusterExport(cl,"error_message")
# This takes 6 minutes with 92 GB RAM.
system.time(
regs_baby_awar_nr_3 <- parApply(cl,baby_betas[,500001:ncol(baby_betas)],2, function(x)
tryCatch(
{
all_models <- rlm(x ~ age + # lower case age is mother's age
bmi +
parous +
pcsec +
palco +
sex +
ga_meth +
cohort +
PC1_cells +
PC2_cells +
awar_nr,
data = dfb)
return(all_models)
},
error = function(e){
error_message()
})))
stopCluster(cl)
# This takes 4 minutes with 92 GB RAM.
baby_awar_nr_results3_cell_corrected <- lapply(regs_baby_awar_nr_3, mod_sum_robust)
rm(regs_baby_awar_nr_3)
baby_awar_nr_results3_cell_corrected <- Filter(Negate(anyNA), baby_awar_nr_results3_cell_corrected)
list.save(baby_awar_nr_results3_cell_corrected, file = here("output","baby_awar_nr_results3_cell_corrected.rds"))
baby_awar_nr_params3_cell_corrected <- getValues(baby_awar_nr_results3_cell_corrected)
saveRDS(baby_awar_nr_params3_cell_corrected, here("output","baby_awar_nr_params3_cell_corrected.rds"))
}
mom_gtsum_results <- mom_gtsum_params1_cell_corrected %>%
bind_rows(mom_gtsum_params2_cell_corrected) %>%
bind_rows(mom_gtsum_params3_cell_corrected) %>%
mutate(p_log10 = -1*log10(pval)) %>%
mutate(fdr = p.adjust(pval, method = "fdr")) %>%
rownames_to_column(var = "probe")
mom_setot_results <- mom_setot_params1_cell_corrected %>%
bind_rows(mom_setot_params2_cell_corrected) %>%
bind_rows(mom_setot_params3_cell_corrected) %>%
mutate(p_log10 = -1*log10(pval)) %>%
mutate(fdr = p.adjust(pval, method = "fdr"))%>%
rownames_to_column(var = "probe")
mom_achronic_results <- mom_achronic_params1_cell_corrected %>%
bind_rows(mom_achronic_params2_cell_corrected) %>%
bind_rows(mom_achronic_params3_cell_corrected) %>%
mutate(p_log10 = -1*log10(pval)) %>%
mutate(fdr = p.adjust(pval, method = "fdr"))%>%
rownames_to_column(var = "probe")
mom_awar_nr_results <- mom_awar_nr_params1_cell_corrected %>%
bind_rows(mom_awar_nr_params2_cell_corrected) %>%
bind_rows(mom_awar_nr_params3_cell_corrected) %>%
mutate(p_log10 = -1*log10(pval)) %>%
mutate(fdr = p.adjust(pval, method = "fdr"))%>%
rownames_to_column(var = "probe")
# head(sort(mom_gtsum_results$pval), n=10)
# head(sort(mom_setot_results$pval), n=10)
# head(sort(mom_achronic_results$pval), n=10)
# head(sort(mom_awar_nr_results$pval), n=10)
baby_gtsum_results <- baby_gtsum_params1_cell_corrected %>%
bind_rows(baby_gtsum_params2_cell_corrected) %>%
bind_rows(baby_gtsum_params3_cell_corrected) %>%
mutate(p_log10 = -1*log10(pval)) %>%
mutate(fdr = p.adjust(pval, method = "fdr"))%>%
rownames_to_column(var = "probe")
baby_setot_results <- baby_setot_params1_cell_corrected %>%
bind_rows(baby_setot_params2_cell_corrected) %>%
bind_rows(baby_setot_params3_cell_corrected) %>%
mutate(p_log10 = -1*log10(pval)) %>%
mutate(fdr = p.adjust(pval, method = "fdr"))%>%
rownames_to_column(var = "probe")
baby_achronic_results <- baby_achronic_params1_cell_corrected %>%
bind_rows(baby_achronic_params2_cell_corrected) %>%
bind_rows(baby_achronic_params3_cell_corrected) %>%
mutate(p_log10 = -1*log10(pval)) %>%
mutate(fdr = p.adjust(pval, method = "fdr"))%>%
rownames_to_column(var = "probe")
baby_awar_nr_results <- baby_awar_nr_params1_cell_corrected %>%
bind_rows(baby_awar_nr_params2_cell_corrected) %>%
bind_rows(baby_awar_nr_params3_cell_corrected) %>%
mutate(p_log10 = -1*log10(pval)) %>%
mutate(fdr = p.adjust(pval, method = "fdr"))%>%
rownames_to_column(var = "probe")
# head(sort(baby_gtsum_results$pval), n=10)
# head(sort(baby_setot_results$pval), n=10)
# head(sort(baby_achronic_results$pval), n=10)
# head(sort(baby_awar_nr_results$pval), n=10)
# make Manhattan plots for all eight analyses.
# gtsum mothers.
# One model failed so there are 706986 probes here.
mom_gtsum_man <- zhou %>%
select(probeID,CpG_chrm,CpG_end,probeType) %>%
rename(probe = probeID, CHR = CpG_chrm, BP = CpG_end) %>%
right_join(mom_gtsum_results, by = c("probe" = "probe")) %>%
filter(probeType == "cg") %>%
mutate(chrom = str_remove(CHR,"chr")) %>%
arrange(chrom) %>%
filter(chrom != "Y") %>% # This removes 6 probes. 706980 total.
mutate(chrom2 = factor(chrom,
ordered = TRUE,
levels = c("1","2","3","4","5","6","7",
"8","9","10","11","12","13",
"14","15","16","17","18","19",
"20","21","22","X")))
mom_gtsum_man2 <- mom_gtsum_man %>%
# Compute chromosome size
group_by(chrom2) %>%
summarise(chr_len=max(BP)) %>%
# Calculate cumulative position of each chromosome
mutate(tot= cumsum(as.numeric(chr_len))-as.numeric(chr_len)) %>%
select(-chr_len) %>%
# Add this info to the initial dataset
left_join(mom_gtsum_man,., by= c("chrom2" = "chrom2")) %>%
# Add a cumulative position of each SNP
arrange(chrom2, BP) %>%
mutate(BPcum=BP+tot)
axisdf <- mom_gtsum_man2 %>% group_by(chrom2) %>% dplyr::summarize(center= (max(BPcum) + min(BPcum) ) / 2 )
mom_gtsum_man <- ggplot(mom_gtsum_man2, aes(x=BPcum, y=-log10(pval))) +
# Show all points
geom_point(aes(color=chrom2), alpha=0.7, size=1.1) +
geom_hline(yintercept = -log10(0.05/nrow(mom_gtsum_man2)), color = "black",
lty = "dashed") +
# geom_label_repel(aes(x= BPcum, y= p_log10, label = probe),
# data = mom_gtsum_man2[mom_gtsum_man2$p_log10 >
# -log10(0.05/nrow(mom_gtsum_man2)), ],
# box.padding = 0.6) +
scale_color_manual(values = rep(c("purple", "green3"), 23 )) +
# custom X axis:
scale_x_continuous( label = axisdf$chrom2, breaks= axisdf$center) +
xlab("Chromosome") +
# remove space between plot area and x axis
scale_y_continuous(expand = c(0, 0), labels = seq(0,12,2), breaks = seq(0,12,2),
limits = c(0,12)) +
ylab("-log10 p value") +
# Custom the theme:
theme_bw() +
theme(
legend.position="none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
ggtitle("General trauma in mothers") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(x = guide_axis(angle = 90)) +
theme(axis.text.x = element_text(size = 6))
# setot mothers
# one model failed here. 706986 probes.
mom_setot_man <- zhou %>%
select(probeID,CpG_chrm,CpG_end,probeType) %>%
rename(probe = probeID, CHR = CpG_chrm, BP = CpG_end) %>%
right_join(mom_setot_results, by = c("probe" = "probe")) %>%
filter(probeType=="cg") %>%
mutate(chrom = str_remove(CHR,"chr")) %>%
arrange(chrom) %>%
filter(chrom != "Y") %>% # This removes 6 y chromosome probes. 706980 probes left.
mutate(chrom2 = factor(chrom,
ordered = TRUE,
levels = c("1","2","3","4","5","6","7",
"8","9","10","11","12","13",
"14","15","16","17","18","19",
"20","21","22","X")))
mom_setot_man2 <- mom_setot_man %>%
# Compute chromosome size
group_by(chrom2) %>%
summarise(chr_len=max(BP)) %>%
# Calculate cumulative position of each chromosome
mutate(tot= cumsum(as.numeric(chr_len))-as.numeric(chr_len)) %>%
select(-chr_len) %>%
# Add this info to the initial dataset
left_join(mom_setot_man,., by= c("chrom2" = "chrom2")) %>%
# Add a cumulative position of each SNP
arrange(chrom2, BP) %>%
mutate(BPcum=BP+tot)
axisdf <- mom_setot_man2 %>% group_by(chrom2) %>% dplyr::summarize(center= (max(BPcum) + min(BPcum) ) / 2 )
mom_setot_man <- ggplot(mom_setot_man2, aes(x=BPcum, y=-log10(pval))) +
# Show all points
geom_point(aes(color=chrom2), alpha=0.7, size=1.1) +
geom_hline(yintercept = -log10(0.05/nrow(mom_setot_man2)), color = "black",
lty = "dashed") +
# geom_label_repel(aes(x= BPcum, y= p_log10, label = probe),
# data = mom_setot_man2[mom_setot_man2$p_log10 >
# -log10(0.05/nrow(mom_setot_man2)), ],
# box.padding = 0.6, max.overlaps = Inf) +
scale_color_manual(values = rep(c("purple", "green3"), 23 )) +
# custom X axis:
scale_x_continuous( label = axisdf$chrom2, breaks= axisdf$center) +
xlab("Chromosome") +
# remove space between plot area and x axis
scale_y_continuous(expand = c(0, 0), labels = seq(0,12,2), breaks = seq(0,12,2),
limits = c(0,12)) +
ylab("-log10 p value") +
# Custom the theme:
theme_bw() +
theme(
legend.position="none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
ggtitle("Sexual trauma in mothers") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(x = guide_axis(angle = 90)) +
theme(axis.text.x = element_text(size = 6))
# awar_nr mothers
# Total probes is 706987.
mom_awar_nr_man <- zhou %>%
select(probeID,CpG_chrm,CpG_end,probeType) %>%
rename(probe = probeID, CHR = CpG_chrm, BP = CpG_end) %>%
right_join(mom_awar_nr_results, by = c("probe" = "probe")) %>%
filter(probeType=="cg") %>%
mutate(chrom = str_remove(CHR,"chr")) %>%
arrange(chrom) %>%
filter(chrom != "Y") %>% # This removes six more probes. 706981.
mutate(chrom2 = factor(chrom,
ordered = TRUE,
levels = c("1","2","3","4","5","6","7",
"8","9","10","11","12","13",
"14","15","16","17","18","19",
"20","21","22","X")))
mom_awar_nr_man2 <- mom_awar_nr_man %>%
# Compute chromosome size
group_by(chrom2) %>%
summarise(chr_len=max(BP)) %>%
# Calculate cumulative position of each chromosome
mutate(tot= cumsum(as.numeric(chr_len))-as.numeric(chr_len)) %>%
select(-chr_len) %>%
# Add this info to the initial dataset
left_join(mom_awar_nr_man,., by= c("chrom2" = "chrom2")) %>%
# Add a cumulative position of each SNP
arrange(chrom2, BP) %>%
mutate(BPcum=BP+tot)
axisdf <- mom_awar_nr_man2 %>% group_by(chrom2) %>% dplyr::summarize(center= (max(BPcum) + min(BPcum) ) / 2 )
mom_awar_nr_man <- ggplot(mom_awar_nr_man2, aes(x=BPcum, y=-log10(pval))) +
# Show all points
geom_point(aes(color=chrom2), alpha=0.7, size=1.1) +
geom_hline(yintercept = -log10(0.05/nrow(mom_awar_nr_man2)), color = "black",
lty = "dashed") +
# geom_label_repel(aes(x= BPcum, y= p_log10, label = probe),
# data = mom_awar_nr_man2[mom_awar_nr_man2$p_log10 >
# -log10(0.05/nrow(mom_awar_nr_man2)), ],
# box.padding = 0.6) +
scale_color_manual(values = rep(c("purple", "green3"), 23 )) +
# custom X axis:
scale_x_continuous( label = axisdf$chrom2, breaks= axisdf$center) +
xlab("Chromosome") +
# remove space between plot area and x axis
scale_y_continuous(expand = c(0, 0), labels = seq(0,12,2), breaks = seq(0,12,2),
limits = c(0,12)) +
ylab("-log10 p value") +
# Custom the theme:
theme_bw() +
theme(
legend.position="none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
ggtitle("War trauma in mothers") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(x = guide_axis(angle = 90)) +
theme(axis.text.x = element_text(size = 6))
# achronic mothers
# one probe failed. 706986 probes total.
mom_achronic_man <- zhou %>%
select(probeID,CpG_chrm,CpG_end,probeType) %>%
rename(probe = probeID, CHR = CpG_chrm, BP = CpG_end) %>%
right_join(mom_achronic_results, by = c("probe" = "probe")) %>%
filter(probeType=="cg") %>%
mutate(chrom = str_remove(CHR,"chr")) %>%
arrange(chrom) %>%
filter(chrom != "Y") %>% # This removes 6 probes. 706980 probes total.
mutate(chrom2 = factor(chrom,
ordered = TRUE,
levels = c("1","2","3","4","5","6","7",
"8","9","10","11","12","13",
"14","15","16","17","18","19",
"20","21","22","X")))
mom_achronic_man2 <- mom_achronic_man %>%
# Compute chromosome size
group_by(chrom2) %>%
summarise(chr_len=max(BP)) %>%
# Calculate cumulative position of each chromosome
mutate(tot= cumsum(as.numeric(chr_len))-as.numeric(chr_len)) %>%
select(-chr_len) %>%
# Add this info to the initial dataset
left_join(mom_achronic_man,., by= c("chrom2" = "chrom2")) %>%
# Add a cumulative position of each SNP
arrange(chrom2, BP) %>%
mutate(BPcum=BP+tot)
axisdf <- mom_achronic_man2 %>% group_by(chrom2) %>% dplyr::summarize(center= (max(BPcum) + min(BPcum) ) / 2 )
mom_achronic_man <- ggplot(mom_achronic_man2, aes(x=BPcum, y=-log10(pval))) +
# Show all points
geom_point(aes(color=chrom2), alpha=0.7, size=1.1) +
geom_hline(yintercept = -log10(0.05/nrow(mom_achronic_man2)), color = "black",
lty = "dashed") +
# geom_label_repel(aes(x= BPcum, y= p_log10, label = probe),
# data = mom_achronic_man2[mom_achronic_man2$p_log10 >
# -log10(0.05/nrow(mom_achronic_man2)), ],
# box.padding = 0.6) +
scale_color_manual(values = rep(c("purple", "green3"), 23 )) +
# custom X axis:
scale_x_continuous( label = axisdf$chrom2, breaks= axisdf$center) +
xlab("Chromosome") +
# remove space between plot area and x axis
scale_y_continuous(expand = c(0, 0), labels = seq(0,12,2), breaks = seq(0,12,2),
limits = c(0,12)) +
ylab("-log10 p value") +
# Custom the theme:
theme_bw() +
theme(
legend.position="none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
ggtitle("Chronic stress in mothers") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(x = guide_axis(angle = 90)) +
theme(axis.text.x = element_text(size = 6))
##########################
# Baby Manhattan Plots
#########################
# gtsum babies
# Total probes are 706987.
baby_gtsum_man <- zhou %>%
select(probeID,CpG_chrm,CpG_end,probeType) %>%
rename(probe = probeID, CHR = CpG_chrm, BP = CpG_end) %>%
right_join(baby_gtsum_results, by = c("probe" = "probe")) %>%
filter(probeType == "cg") %>%
mutate(chrom = str_remove(CHR,"chr")) %>%
arrange(chrom) %>%
filter(chrom != "Y") %>% # This removes 6 probes
filter(chrom != "X") %>% # This removes 15114 probes. Total left is 691867.
mutate(chrom2 = factor(chrom,
ordered = TRUE,
levels = c("1","2","3","4","5","6","7",
"8","9","10","11","12","13",
"14","15","16","17","18","19",
"20","21","22")))
baby_gtsum_man2 <- baby_gtsum_man %>%
# Compute chromosome size
group_by(chrom2) %>%
summarise(chr_len=max(BP)) %>%
# Calculate cumulative position of each chromosome
mutate(tot= cumsum(as.numeric(chr_len))-as.numeric(chr_len)) %>%
select(-chr_len) %>%
# Add this info to the initial dataset
left_join(baby_gtsum_man,., by= c("chrom2" = "chrom2")) %>%
# Add a cumulative position of each SNP
arrange(chrom2, BP) %>%
mutate(BPcum=BP+tot)
axisdf <- baby_gtsum_man2 %>% group_by(chrom2) %>% dplyr::summarize(center= (max(BPcum) + min(BPcum) ) / 2 )
baby_gtsum_man <- ggplot(baby_gtsum_man2, aes(x=BPcum, y=-log10(pval))) +
# Show all points
geom_point(aes(color=chrom2), alpha=0.7, size=1.1) +
geom_hline(yintercept = -log10(0.05/nrow(baby_gtsum_man2)), color = "black",
lty = "dashed") +
# geom_label_repel(aes(x= BPcum, y= p_log10, label = probe),
# data = baby_gtsum_man2[baby_gtsum_man2$p_log10 >
# -log10(0.05/nrow(baby_gtsum_man2)), ],
# box.padding = 0.6) +
scale_color_manual(values = rep(c("purple", "green3"), 22 )) +
# custom X axis:
scale_x_continuous( label = axisdf$chrom2, breaks= axisdf$center) +
xlab("Chromosome") +
# remove space between plot area and x axis
scale_y_continuous(expand = c(0, 0), labels = seq(0,12,2), breaks = seq(0,12,2),
limits = c(0,12)) +
ylab("-log10 p value") +
# Custom the theme:
theme_bw() +
theme(
legend.position="none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
ggtitle("General trauma in newborns") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(x = guide_axis(angle = 90)) +
theme(axis.text.x = element_text(size = 6))
# setot babies
baby_setot_man <- zhou %>%
select(probeID,CpG_chrm,CpG_end,probeType) %>%
rename(probe = probeID, CHR = CpG_chrm, BP = CpG_end) %>%
right_join(baby_setot_results, by = c("probe" = "probe")) %>%
filter(probeType=="cg") %>%
mutate(chrom = str_remove(CHR,"chr")) %>%
arrange(chrom) %>%
filter(chrom != "Y") %>% # 6 probes removed
filter(chrom != "X") %>% # 15114 probes removed. 691867 probes total left.
mutate(chrom2 = factor(chrom,
ordered = TRUE,
levels = c("1","2","3","4","5","6","7",
"8","9","10","11","12","13",
"14","15","16","17","18","19",
"20","21","22")))
baby_setot_man2 <- baby_setot_man %>%
# Compute chromosome size
group_by(chrom2) %>%
summarise(chr_len=max(BP)) %>%
# Calculate cumulative position of each chromosome
mutate(tot= cumsum(as.numeric(chr_len))-as.numeric(chr_len)) %>%
select(-chr_len) %>%
# Add this info to the initial dataset
left_join(baby_setot_man,., by= c("chrom2" = "chrom2")) %>%
# Add a cumulative position of each SNP
arrange(chrom2, BP) %>%
mutate(BPcum=BP+tot)
axisdf <- baby_setot_man2 %>% group_by(chrom2) %>% dplyr::summarize(center= (max(BPcum) + min(BPcum) ) / 2 )
baby_setot_man <- ggplot(baby_setot_man2, aes(x=BPcum, y=-log10(pval))) +
# Show all points
geom_point(aes(color=chrom2), alpha=0.7, size=1.1) +
geom_hline(yintercept = -log10(0.05/nrow(baby_setot_man2)), color = "black",
lty = "dashed") +
# geom_label_repel(aes(x= BPcum, y= p_log10, label = probe),
# data = baby_setot_man2[baby_setot_man2$p_log10 >
# -log10(0.05/nrow(baby_setot_man2)), ],
# box.padding = 0.6, max.overlaps = Inf) +
scale_color_manual(values = rep(c("purple", "green3"), 22 )) +
# custom X axis:
scale_x_continuous( label = axisdf$chrom2, breaks= axisdf$center) +
xlab("Chromosome") +
# remove space between plot area and x axis
scale_y_continuous(expand = c(0, 0), labels = seq(0,12,2), breaks = seq(0,12,2),
limits = c(0,12)) +
ylab("-log10 p value") +
# Custom the theme:
theme_bw() +
theme(
legend.position="none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
ggtitle("Sexual trauma in newborns") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(x = guide_axis(angle = 90)) +
theme(axis.text.x = element_text(size = 6))
# awar_nr babies
# 706987 probes total.
baby_awar_nr_man <- zhou %>%
select(probeID,CpG_chrm,CpG_end,probeType) %>%
rename(probe = probeID, CHR = CpG_chrm, BP = CpG_end) %>%
right_join(baby_awar_nr_results, by = c("probe" = "probe")) %>%
filter(probeType=="cg") %>%
mutate(chrom = str_remove(CHR,"chr")) %>%
arrange(chrom) %>%
filter(chrom != "Y") %>% # 6 probes removed
filter(chrom != "X") %>% # 15114 probes removed. Leaves 691867 probes.
mutate(chrom2 = factor(chrom,
ordered = TRUE,
levels = c("1","2","3","4","5","6","7",
"8","9","10","11","12","13",
"14","15","16","17","18","19",
"20","21","22")))
baby_awar_nr_man2 <- baby_awar_nr_man %>%
# Compute chromosome size
group_by(chrom2) %>%
summarise(chr_len=max(BP)) %>%
# Calculate cumulative position of each chromosome
mutate(tot= cumsum(as.numeric(chr_len))-as.numeric(chr_len)) %>%
select(-chr_len) %>%
# Add this info to the initial dataset
left_join(baby_awar_nr_man,., by= c("chrom2" = "chrom2")) %>%
# Add a cumulative position of each SNP
arrange(chrom2, BP) %>%
mutate(BPcum=BP+tot)
axisdf <- baby_awar_nr_man2 %>% group_by(chrom2) %>% dplyr::summarize(center= (max(BPcum) + min(BPcum) ) / 2 )
baby_awar_nr_man <- ggplot(baby_awar_nr_man2, aes(x=BPcum, y=-log10(pval))) +
# Show all points
geom_point(aes(color=chrom2), alpha=0.7, size=1.1) +
geom_hline(yintercept = -log10(0.05/nrow(baby_awar_nr_man2)), color = "black",
lty = "dashed") +
# geom_label_repel(aes(x= BPcum, y= p_log10, label = probe),
# data = baby_awar_nr_man2[baby_awar_nr_man2$p_log10 >
# -log10(0.05/nrow(baby_awar_nr_man2)), ],
# box.padding = 0.6) +
scale_color_manual(values = rep(c("purple", "green3"), 22 )) +
# custom X axis:
scale_x_continuous( label = axisdf$chrom2, breaks= axisdf$center) +
xlab("Chromosome") +
# remove space between plot area and x axis
scale_y_continuous(expand = c(0, 0), labels = seq(0,12,2), breaks = seq(0,12,2),
limits = c(0,12)) +
ylab("-log10 p value") +
# Custom the theme:
theme_bw() +
theme(
legend.position="none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
ggtitle("War trauma in newborns") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(x = guide_axis(angle = 90)) +
theme(axis.text.x = element_text(size = 6))
# achronic babies
# 706987 probes total.
baby_achronic_man <- zhou %>%
select(probeID,CpG_chrm,CpG_end,probeType) %>%
rename(probe = probeID, CHR = CpG_chrm, BP = CpG_end) %>%
right_join(baby_achronic_results, by = c("probe" = "probe")) %>%
filter(probeType=="cg") %>%
mutate(chrom = str_remove(CHR,"chr")) %>%
arrange(chrom) %>%
filter(chrom != "Y") %>% # removes 6 probes
filter(chrom != "X") %>% # removes 15114 probes. That leaves 691867 probes.
mutate(chrom2 = factor(chrom,
ordered = TRUE,
levels = c("1","2","3","4","5","6","7",
"8","9","10","11","12","13",
"14","15","16","17","18","19",
"20","21","22")))
baby_achronic_man2 <- baby_achronic_man %>%
# Compute chromosome size
group_by(chrom2) %>%
summarise(chr_len=max(BP)) %>%
# Calculate cumulative position of each chromosome
mutate(tot= cumsum(as.numeric(chr_len))-as.numeric(chr_len)) %>%
select(-chr_len) %>%
# Add this info to the initial dataset
left_join(baby_achronic_man,., by= c("chrom2" = "chrom2")) %>%
# Add a cumulative position of each SNP
arrange(chrom2, BP) %>%
mutate(BPcum=BP+tot)
axisdf <- baby_achronic_man2 %>% group_by(chrom2) %>% dplyr::summarize(center= (max(BPcum) + min(BPcum) ) / 2 )
baby_achronic_man <- ggplot(baby_achronic_man2, aes(x=BPcum, y=-log10(pval))) +
# Show all points
geom_point(aes(color=chrom2), alpha=0.7, size=1.1) +
geom_hline(yintercept = -log10(0.05/nrow(baby_achronic_man2)), color = "black",
lty = "dashed") +
# geom_label_repel(aes(x= BPcum, y= p_log10, label = probe),
# data = baby_achronic_man2[baby_achronic_man2$p_log10 >
# -log10(0.05/nrow(baby_achronic_man2)), ],
# box.padding = 0.6) +
scale_color_manual(values = rep(c("purple", "green3"), 22 )) +
# custom X axis:
scale_x_continuous( label = axisdf$chrom2, breaks= axisdf$center) +
xlab("Chromosome") +
# remove space between plot area and x axis
scale_y_continuous(expand = c(0, 0), labels = seq(0,12,2), breaks = seq(0,12,2),
limits = c(0,12)) +
ylab("-log10 p value") +
# Custom the theme:
theme_bw() +
theme(
legend.position="none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()) +
ggtitle("Chronic stress in newborns") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(x = guide_axis(angle = 90)) +
theme(axis.text.x = element_text(size = 6))
###########################################################
# volcano plots for all eight analyses
mom_gtsum_volcano <- ggplot(mom_gtsum_man2,aes(x= coef, y= p_log10, label = probe)) +
theme_bw() +
geom_point(alpha = 0.6, shape = 20,
color = ifelse(mom_gtsum_man2$p_log10 > -log10(0.05/nrow(mom_gtsum_man2)),
'green4','black')) +
geom_hline(yintercept= -log10(0.05/nrow(mom_gtsum_man2)), col="red") +
geom_label_repel(aes(x= coef, y= p_log10, label = probe),
data = mom_gtsum_man2[mom_gtsum_man2$p_log10 >
-log10(0.05/nrow(mom_gtsum_man2)), ],
box.padding = 0.6) +
ggtitle("Congo Mothers General Trauma EWAS") +
theme(plot.title = element_text(hjust = 0.5)) +
xlim(-0.1,0.1) +
ylim(0,15) +
xlab("Effect of general trauma on DNAm at a given probe") +
ylab("-log10 p value")
mom_setot_volcano <- ggplot(mom_setot_man2,aes(x= coef, y= p_log10, label = probe)) +
theme_bw() +
geom_point(alpha = 0.6, shape = 20,
color = ifelse(mom_setot_man2$p_log10 > -log10(0.05/nrow(mom_setot_man2)),
'green4','black')) +
geom_hline(yintercept= -log10(0.05/nrow(mom_setot_man2)), col="red") +
geom_label_repel(aes(x= coef, y= p_log10, label = probe),
data = mom_setot_man2[mom_setot_man2$p_log10 >
-log10(0.05/nrow(mom_setot_man2)), ],
box.padding = 0.6,
max.overlaps = Inf) +
ggtitle("Congo Mothers Sexual Events EWAS") +
theme(plot.title = element_text(hjust = 0.5)) +
xlim(-0.1,0.1) +
ylim(0,15) +
xlab("Effect of sexual trauma on DNAm at a given probe") +
ylab("-log10 p value")
mom_achronic_volcano <- ggplot(mom_achronic_man2,aes(x= coef, y= p_log10, label = probe)) +
theme_bw() +
geom_point(alpha = 0.6, shape = 20,
color = ifelse(mom_achronic_man2$p_log10 > -log10(0.05/nrow(mom_achronic_man2)),
'green4','black')) +
geom_hline(yintercept= -log10(0.05/nrow(mom_achronic_man2)), col="red") +
geom_label_repel(aes(x= coef, y= p_log10, label = probe),
data = mom_achronic_man2[mom_achronic_man2$p_log10 >
-log10(0.05/nrow(mom_achronic_man2)), ],
box.padding = 0.6,
max.overlaps = Inf) +
ggtitle("Congo Mothers Chronic Stress EWAS") +
theme(plot.title = element_text(hjust = 0.5)) +
xlim(-0.1,0.1) +
ylim(0,15) +
xlab("Effect of chronic stress on DNAm at a given probe") +
ylab("-log10 p value")
mom_awar_nr_volcano <- ggplot(mom_awar_nr_man2,aes(x= coef, y= p_log10, label = probe)) +
theme_bw() +
geom_point(alpha = 0.6, shape = 20,
color = ifelse(mom_awar_nr_man2$p_log10 > -log10(0.05/nrow(mom_awar_nr_man2)),
'green4','black')) +
geom_hline(yintercept= -log10(0.05/nrow(mom_awar_nr_man2)), col="red") +
geom_label_repel(aes(x= coef, y= p_log10, label = probe),
data = mom_awar_nr_man2[mom_awar_nr_man2$p_log10 >
-log10(0.05/nrow(mom_awar_nr_man2)), ],
box.padding = 0.6,
max.overlaps = Inf) +
ggtitle("Congo Mothers War Stress EWAS") +
theme(plot.title = element_text(hjust = 0.5)) +
xlim(-0.1,0.1) +
ylim(0,15) +
xlab("Effect of war stress on DNAm at a given probe") +
ylab("-log10 p value")
baby_gtsum_volcano <- ggplot(baby_gtsum_man2,aes(x= coef, y= p_log10, label = probe)) +
theme_bw() +
geom_point(alpha = 0.6, shape = 20,
color = ifelse(baby_gtsum_man2$p_log10 > -log10(0.05/nrow(baby_gtsum_man2)),
'green4','black')) +
geom_hline(yintercept= -log10(0.05/nrow(baby_gtsum_man2)), col="red") +
geom_label_repel(aes(x= coef, y= p_log10, label = probe),
data = baby_gtsum_man2[baby_gtsum_man2$p_log10 >
-log10(0.05/nrow(baby_gtsum_man2)), ],
box.padding = 0.6) +
ggtitle("Congo Babies General Trauma EWAS") +
theme(plot.title = element_text(hjust = 0.5)) +
xlim(-0.1,0.1) +
ylim(0,15) +
xlab("Effect of general trauma on DNAm at a given probe") +
ylab("-log10 p value")
baby_setot_volcano <- ggplot(baby_setot_man2,aes(x= coef, y= p_log10, label = probe)) +
theme_bw() +
geom_point(alpha = 0.6, shape = 20,
color = ifelse(baby_setot_man2$p_log10 > -log10(0.05/nrow(baby_setot_man2)),
'green4','black')) +
geom_hline(yintercept= -log10(0.05/nrow(baby_setot_man2)), col="red") +
geom_label_repel(aes(x= coef, y= p_log10, label = probe),
data = baby_setot_man2[baby_setot_man2$p_log10 >
-log10(0.05/nrow(baby_setot_man2)), ],
box.padding = 0.6,
max.overlaps = Inf) +
ggtitle("Congo Babies Sexual Events EWAS") +
theme(plot.title = element_text(hjust = 0.5)) +
xlim(-0.1,0.1) +
ylim(0,15) +
xlab("Effect of sexual trauma on DNAm at a given probe") +
ylab("-log10 p value")
baby_achronic_volcano <- ggplot(baby_achronic_man2,aes(x= coef, y= p_log10, label = probe)) +
theme_bw() +
geom_point(alpha = 0.6, shape = 20,
color = ifelse(baby_achronic_man2$p_log10 > -log10(0.05/nrow(baby_achronic_man2)),
'green4','black')) +
geom_hline(yintercept= -log10(0.05/nrow(baby_achronic_man2)), col="red") +
geom_label_repel(aes(x= coef, y= p_log10, label = probe),
data = baby_achronic_man2[baby_achronic_man2$p_log10 >
-log10(0.05/nrow(baby_achronic_man2)), ],
box.padding = 0.6,
max.overlaps = Inf) +
ggtitle("Congo Babies Chronic Stress EWAS") +
theme(plot.title = element_text(hjust = 0.5)) +
xlim(-0.1,0.1) +
ylim(0,15) +
xlab("Effect of chronic stress on DNAm at a given probe") +
ylab("-log10 p value")
baby_awar_nr_volcano <- ggplot(baby_awar_nr_man2,aes(x= coef, y= p_log10, label = probe)) +
theme_bw() +
geom_point(alpha = 0.6, shape = 20,
color = ifelse(baby_awar_nr_man2$p_log10 > -log10(0.05/nrow(baby_awar_nr_man2)),
'green4','black')) +
geom_hline(yintercept= -log10(0.05/nrow(baby_awar_nr_man2)), col="red") +
geom_label_repel(aes(x= coef, y= p_log10, label = probe),
data = baby_awar_nr_man2[baby_awar_nr_man2$p_log10 >
-log10(0.05/nrow(baby_awar_nr_man2)), ],
box.padding = 0.6,
max.overlaps = Inf) +
ggtitle("Congo Babies War Stress EWAS") +
theme(plot.title = element_text(hjust = 0.5)) +
xlim(-0.1,0.1) +
ylim(0,15) +
xlab("Effect of war stress on DNAm at a given probe") +
ylab("-log10 p value")
# Print all manhattan plots
# mom_gtsum_man
# mom_setot_man
# mom_awar_nr_man
# mom_achronic_man
#
# baby_gtsum_man
# baby_setot_man
# baby_awar_nr_man
# baby_achronic_man
# Print all volcano plots
# mom_gtsum_volcano
# mom_setot_volcano
# mom_awar_nr_volcano
# mom_achronic_volcano
#
# baby_gtsum_volcano
# baby_setot_volcano
# baby_awar_nr_volcano
# baby_achronic_volcano
Get the bonferroni significant cutoffs for each test.
cat("The bonferroni significance cutoffs for mothers are: ")
## The bonferroni significance cutoffs for mothers are:
cat("mothers general trauma",0.05/nrow(mom_gtsum_man2)," ")
## mothers general trauma 7.072336e-08
cat("mothers sexual trauma",0.05/nrow(mom_setot_man2)," ")
## mothers sexual trauma 7.072336e-08
cat("mothers war stress",0.05/nrow(mom_awar_nr_man2)," ")
## mothers war stress 7.072326e-08
cat("mothers chronic stress",0.05/nrow(mom_achronic_man2)," ")
## mothers chronic stress 7.072336e-08
cat("The bonferroni significance cutoffs for babies are: ")
## The bonferroni significance cutoffs for babies are:
cat("babies general trauma",0.05/nrow(baby_gtsum_man2)," ")
## babies general trauma 7.226822e-08
cat("babies sexual trauma",0.05/nrow(baby_setot_man2)," ")
## babies sexual trauma 7.226822e-08
cat("babies war stress",0.05/nrow(baby_awar_nr_man2)," ")
## babies war stress 7.226822e-08
cat("babies chronic stress",0.05/nrow(baby_achronic_man2)," ")
## babies chronic stress 7.226822e-08
Get the final number of probes in each test:
cat("The final numbers of probes in each test for mothers are: ")
## The final numbers of probes in each test for mothers are:
cat("mothers general trauma",nrow(mom_gtsum_man2)," ")
## mothers general trauma 706980
cat("mothers sexual trauma",nrow(mom_setot_man2)," ")
## mothers sexual trauma 706980
cat("mothers war stress",nrow(mom_awar_nr_man2)," ")
## mothers war stress 706981
cat("mothers chronic stress",nrow(mom_achronic_man2)," ")
## mothers chronic stress 706980
cat("The final numbers of probes in each test for babies are: ")
## The final numbers of probes in each test for babies are:
cat("babies general trauma",nrow(baby_gtsum_man2)," ")
## babies general trauma 691867
cat("babies sexual trauma",nrow(baby_setot_man2)," ")
## babies sexual trauma 691867
cat("babies war stress",nrow(baby_awar_nr_man2)," ")
## babies war stress 691867
cat("babies chronic stress",nrow(baby_achronic_man2)," ")
## babies chronic stress 691867
Plot the data for genome-wide significant sites.
# Get significant sites for those analyses with significant sites.
mom_sig_gtsum <- mom_gtsum_man2 %>%
filter(pval < (0.05/nrow(mom_gtsum_man2))) %>%
select(probe,pval) %>%
mutate(exposure = c("general_trauma")) %>%
mutate(generation = c("mother"))
mom_sig_setot <- mom_setot_man2 %>%
filter(pval < (0.05/nrow(mom_setot_man2))) %>%
select(probe,pval) %>%
mutate(exposure = c("sexual_events"))%>%
mutate(generation = c("mother"))
mom_sig_awar_nr <- mom_awar_nr_man2 %>%
filter(pval < (0.05/nrow(mom_awar_nr_man2))) %>%
select(probe,pval) %>%
mutate(exposure = c("war_stress"))%>%
mutate(generation = c("mother"))
mom_sig_achronic <- mom_achronic_man2 %>%
filter(pval < (0.05/nrow(mom_achronic_man2))) %>%
select(probe,pval) %>%
mutate(exposure = c("chronic_stress"))%>%
mutate(generation = c("mother"))
baby_sig_gtsum <- baby_gtsum_man2 %>%
filter(pval < (0.05/nrow(baby_gtsum_man2))) %>%
select(probe,pval) %>%
mutate(exposure = c("general_trauma")) %>%
mutate(generation = c("baby"))
baby_sig_setot <- baby_setot_man2 %>%
filter(pval < (0.05/nrow(baby_setot_man2))) %>%
select(probe,pval) %>%
mutate(exposure = c("sexual_events"))%>%
mutate(generation = c("baby"))
baby_sig_awar_nr <- baby_awar_nr_man2 %>%
filter(pval < (0.05/nrow(baby_awar_nr_man2))) %>%
select(probe,pval) %>%
mutate(exposure = c("war_stress"))%>%
mutate(generation = c("baby"))
baby_sig_achronic <- baby_achronic_man2 %>%
filter(pval < (0.05/nrow(baby_achronic_man2))) %>%
select(probe,pval) %>%
mutate(exposure = c("chronic_stress")) %>%
mutate(generation = c("baby"))
mom_all_sig <- bind_rows(mom_sig_gtsum,
mom_sig_setot,
mom_sig_awar_nr,
mom_sig_achronic)
baby_all_sig <- bind_rows(baby_sig_gtsum,
baby_sig_setot,
baby_sig_awar_nr,
baby_sig_achronic)
# Any overlapping and significant sites? Nope.
table(baby_all_sig$probe %in% mom_all_sig$probe)
##
## FALSE
## 11
# Are all the probes in each list unique, or do
# they come up with multiple exposures?
length(mom_all_sig$probe) == length(unique(mom_all_sig$probe))
## [1] TRUE
length(baby_all_sig$probe) == length(unique(baby_all_sig$probe))
## [1] TRUE
# All significant sites are unique. They do not show up in
# any other analyses as significant hits.
# Scatter plots of raw data for significant sites
# subset mom_betas and add to dfm
# Figure out how to subset the significant
# probes for the exposure they are associated with.
dfm_sig <- as.data.frame(mom_betas) %>%
select(any_of(mom_all_sig$probe)) %>%
rownames_to_column(var = "methylation_id")
dfm_sig_gtsum <- dfm_sig %>%
select(methylation_id, any_of(
mom_all_sig[mom_all_sig$exposure=="general_trauma",]$probe)) %>%
right_join(dfm, by = c("methylation_id" = "methylation_id"))
dfm_sig_setot <- dfm_sig %>%
select(methylation_id, any_of(
mom_all_sig[mom_all_sig$exposure=="sexual_events",]$probe)) %>%
right_join(dfm, by = c("methylation_id" = "methylation_id"))
dfm_sig_awar_nr <- dfm_sig %>%
select(methylation_id, any_of(
mom_all_sig[mom_all_sig$exposure=="war_stress",]$probe)) %>%
right_join(dfm, by = c("methylation_id" = "methylation_id"))
# scatter plot mothers gtsum
mom_gtsum_scatter <-
dfm_sig_gtsum %>%
select(gtsum,cohort,starts_with("cg")) %>%
pivot_longer(3:ncol(.), names_to = "CpG", values_to = "beta") %>%
ggplot(aes(x = gtsum, y = beta, color = cohort)) +
geom_point(alpha = 0.5, position = position_jitter(w = 0.1)) +
geom_smooth(method = "lm", se = F) +
facet_wrap(~ CpG, scales = "free") +
ggtitle("General Trauma - Mothers - Genome Wide Significant Sites",
subtitle = "Raw data are plotted") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_bw() +
scale_color_brewer(palette = "Dark2") +
labs(color = "Cohort")
# scatter plot mothers setot
mom_setot_scatter <-
dfm_sig_setot %>%
select(setot,cohort,starts_with("cg")) %>%
pivot_longer(3:ncol(.), names_to = "CpG", values_to = "beta") %>%
ggplot(aes(x = setot, y = beta, color = cohort)) +
geom_point(alpha = 0.5, position = position_jitter(w = 0.1)) +
geom_smooth(method = "lm", se = F) +
facet_wrap(~ CpG, scales = "free") +
ggtitle("Sexual violence - Mothers - Genome Wide Significant Sites",
subtitle = "Raw data are plotted") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_bw() +
scale_color_brewer(palette = "Dark2") +
labs(color = "Cohort")
# scatter plot mothers awar_nr
mom_awar_nr_scatter <-
dfm_sig_awar_nr %>%
select(awar_nr,cohort,starts_with("cg")) %>%
pivot_longer(3:ncol(.), names_to = "CpG", values_to = "beta") %>%
ggplot(aes(x = awar_nr, y = beta, color = cohort)) +
geom_point(alpha = 0.5, position = position_jitter(w = 0.1)) +
geom_smooth(method = "lm", se = F) +
facet_wrap(~ CpG, scales = "free") +
ggtitle("War Stress - Mothers - Genome Wide Significant Sites",
subtitle = "Raw data are plotted") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_bw() +
scale_color_brewer(palette = "Dark2") +
labs(color = "Cohort")
# No significant hits for chronic stress
######################################
# Babies scatter plots for significant
# hits.
######################################
# subset baby_betas and add to dfb
dfb_sig <- as.data.frame(baby_betas) %>%
select(any_of(baby_all_sig$probe)) %>%
rownames_to_column(var = "methylation_id")
dfb_sig_gtsum <- dfb_sig %>%
select(methylation_id, any_of(
baby_all_sig[baby_all_sig$exposure=="general_trauma",]$probe)) %>%
right_join(dfb, by = c("methylation_id" = "methylation_id"))
dfb_sig_setot <- dfb_sig %>%
select(methylation_id, any_of(
baby_all_sig[baby_all_sig$exposure=="sexual_events",]$probe)) %>%
right_join(dfb, by = c("methylation_id" = "methylation_id"))
dfb_sig_awar_nr <- dfb_sig %>%
select(methylation_id, any_of(
baby_all_sig[baby_all_sig$exposure=="war_stress",]$probe)) %>%
right_join(dfb, by = c("methylation_id" = "methylation_id"))
# scatter plot Babies gtsum
baby_gtsum_scatter <-
dfb_sig_gtsum %>%
select(gtsum,cohort,starts_with("cg")) %>%
pivot_longer(3:ncol(.), names_to = "CpG", values_to = "beta") %>%
ggplot(aes(x = gtsum, y = beta, color = cohort)) +
geom_point(alpha = 0.5, position = position_jitter(w = 0.1)) +
geom_smooth(method = "lm", se = F) +
facet_wrap(~ CpG, scales = "free") +
ggtitle("General Trauma - Babies - Genome Wide Significant Sites",
subtitle = "Raw data are plotted") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_bw() +
scale_color_brewer(palette = "Dark2") +
labs(color = "Cohort")
# scatter plot Babies setot
baby_setot_scatter <-
dfb_sig_setot %>%
select(setot,cohort,starts_with("cg")) %>%
pivot_longer(3:ncol(.), names_to = "CpG", values_to = "beta") %>%
ggplot(aes(x = setot, y = beta, color = cohort)) +
geom_point(alpha = 0.5, position = position_jitter(w = 0.1)) +
geom_smooth(method = "lm", se = F) +
facet_wrap(~ CpG, scales = "free") +
ggtitle("Sexual Events - Babies - Genome Wide Significant Sites",
subtitle = "Raw data are plotted") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_bw() +
scale_color_brewer(palette = "Dark2") +
labs(color = "Cohort")
# scatter plot Babies awar_nr
baby_awar_nr_scatter <-
dfb_sig_awar_nr %>%
select(awar_nr,cohort,starts_with("cg")) %>%
pivot_longer(3:ncol(.), names_to = "CpG", values_to = "beta") %>%
ggplot(aes(x = awar_nr, y = beta, color = cohort)) +
geom_point(alpha = 0.5, position = position_jitter(w = 0.1)) +
geom_smooth(method = "lm", se = F) +
facet_wrap(~ CpG, scales = "free") +
ggtitle("War Stress - Babies - Genome Wide Significant Sites",
subtitle = "Raw data are plotted") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_bw() +
scale_color_brewer(palette = "Dark2") +
labs(color = "Cohort")
zhou2 %>%
select(probeID,distToTSS,CGIposition) %>%
right_join(mom_all_sig, by = c("probeID" = "probe")) %>%
select(probeID,pval,exposure,generation,CGIposition,distToTSS) %>%
left_join(illumina2, by = c("probeID")) %>%
arrange(exposure) %>%
kable() %>%
kable_styling(full_width = FALSE,
bootstrap_options = c("striped", "hover"))
| probeID | pval | exposure | generation | CGIposition | distToTSS | gene_context |
|---|---|---|---|---|---|---|
| cg11408019 | 0e+00 | general_trauma | mother | Island | NA | |
| cg14519777 | 1e-07 | general_trauma | mother | NA | 2305;4799;6420;6414;6417;6404;6521;6465;4791;6416;4785;4785;4785;3504;6417;6418;6471;6471 | Body;Body;Body |
| cg14282695 | 0e+00 | general_trauma | mother | NA | 123507;123507;123991;3098;38436;123487;3099;3122;123507 | Body;Body |
| cg16543391 | 1e-07 | general_trauma | mother | N_Shelf | -76;5444;4254;-78;-369;6129;-72;-82;-76;2908;-70;-600;-76;-231;658;-386 | TSS1500;TSS200;TSS200;Body;Body |
| cg21219607 | 1e-07 | sexual_events | mother | NA | -354;37611;20737;37633;-354 | TSS1500;TSS1500;Body;Body |
| cg06308131 | 0e+00 | sexual_events | mother | S_Shore | 26967;26967;60014;60014;26967;26967;26967;60046;60086;2151;60046;677;1260;60046;60046;60046;60046;60046;60046;60046 | Body;Body;Body |
| cg04358942 | 0e+00 | sexual_events | mother | NA | 39109;-539 | |
| cg23527517 | 0e+00 | sexual_events | mother | NA | 5801 | Body |
| cg14859642 | 1e-07 | sexual_events | mother | NA | NA | |
| cg00489624 | 0e+00 | sexual_events | mother | NA | 101087;40319;101132 | 5’UTR |
| cg24308336 | 0e+00 | sexual_events | mother | N_Shelf | 15350;40412 | Body |
| cg16765764 | 0e+00 | sexual_events | mother | NA | 24584;24584;24629;24568;24578 | Body;Body;Body |
| cg10897169 | 1e-07 | sexual_events | mother | NA | 74092;73992;73916;73915;73955 | Body;Body;Body |
| cg13740840 | 1e-07 | war_stress | mother | Island | 18576;-189;-213;-205 | TSS1500;TSS200 |
| cg26486174 | 0e+00 | war_stress | mother | Island | NA |
zhou2 %>%
select(probeID,distToTSS,CGIposition) %>%
right_join(baby_all_sig, by = c("probeID" = "probe")) %>%
select(probeID,pval,exposure,generation,CGIposition,distToTSS) %>%
left_join(illumina2, by = c("probeID")) %>%
arrange(exposure) %>%
kable() %>%
kable_styling(full_width = FALSE,
bootstrap_options = c("striped", "hover"))
| probeID | pval | exposure | generation | CGIposition | distToTSS | gene_context |
|---|---|---|---|---|---|---|
| cg24590750 | 0e+00 | general_trauma | baby | Island | -1200;-1120;-1120;-1091;-66;-182;-66 | TSS1500;TSS200 |
| cg10783680 | 1e-07 | general_trauma | baby | Island | -58;-7;-10;-7;-19;-61;-527;-483;-230;34;17823;-58;-61 | 5’UTR;1stExon;5’UTR;1stExon;5’UTR;1stExon;5’UTR;1stExon;1stExon;5’UTR;TSS200 |
| cg10338475 | 0e+00 | sexual_events | baby | S_Shore | -598;-598;997 | TSS1500;Body |
| cg11386818 | 0e+00 | sexual_events | baby | Island | 1158;1053;209;1089;1145;1130;1945;2034;341;1057;1136;2007;2230;1959;1139;1091;1118;1145;1178;2210 | TSS1500;5’UTR;5’UTR;5’UTR;TSS1500;5’UTR |
| cg02176407 | 1e-07 | sexual_events | baby | Island | 304 | 1stExon |
| cg20807701 | 0e+00 | sexual_events | baby | N_Shore | -677;-670;470;257;473;510;554;60;401;492;449 | Body;TSS1500;5’UTR;1stExon |
| cg09631059 | 0e+00 | sexual_events | baby | N_Shore | 2854;9745;14118;14090;12496;12861;14001;14130;8132;12825;9745;9745;12789;13603;9745;7126;13627;9745 | 5’UTR;Body;Body;Body;Body;Body;Body |
| cg06873316 | 0e+00 | sexual_events | baby | Island | -435;-415;-450;-525 | TSS1500;TSS1500 |
| cg08985979 | 0e+00 | war_stress | baby | S_Shore | 2736;2773;3078 | Body |
| cg21172322 | 0e+00 | war_stress | baby | N_Shore | -135;46928;46913;123;902;46870;215;46876 | Body |
| cg00741900 | 0e+00 | war_stress | baby | Island | 109;-1067;-1084;-1056;-1048;-1037;-1153;-1056;-1056;-1028;-1037 | 5’UTR;1stExon;TSS1500 |
Test for interactions by cohort and stressor for genome-wide significant hits.
mom_gtsum_int <- lapply(dfm_sig_gtsum[grep("cg",colnames(dfm_sig_gtsum))], function(x){
mod_sum(rlm(x ~ Age + bmi + parous + pcsec + palco + cohort + PC1_cells +
gtsum*cohort,
data = dfm_sig_gtsum))
})
mom_setot_int <- lapply(dfm_sig_setot[grep("cg",colnames(dfm_sig_setot))], function(x){
mod_sum(rlm(x ~ Age + bmi + parous + pcsec + palco + cohort + PC1_cells +
setot*cohort,
data = dfm_sig_setot))
})
mom_awar_nr_int <- lapply(dfm_sig_awar_nr[grep("cg",colnames(dfm_sig_awar_nr))], function(x){
mod_sum(rlm(x ~ Age + bmi + parous + pcsec + palco + cohort + PC1_cells +
awar_nr*cohort,
data = dfm_sig_awar_nr))
})
######################################
# Test for interactions among babies
# between cohort and maternal stress
######################################
baby_gtsum_int <- lapply(dfb_sig_gtsum[grep("cg",colnames(dfb_sig_gtsum))], function(x){
mod_sum(rlm(x ~ age + bmi + parous + pcsec + palco + sex + ga_meth + cohort + PC1_cells + PC2_cells +
gtsum*cohort,
data = dfb_sig_gtsum))
})
baby_setot_int <- lapply(dfb_sig_setot[grep("cg",colnames(dfb_sig_setot))], function(x){
mod_sum(rlm(x ~ age + bmi + parous + pcsec + palco + sex + ga_meth + cohort + PC1_cells + PC2_cells +
setot*cohort,
data = dfb_sig_setot))
})
baby_awar_nr_int <- lapply(dfb_sig_awar_nr[grep("cg",colnames(dfb_sig_awar_nr))], function(x){
mod_sum(rlm(x ~ age + bmi + parous + pcsec + palco + sex + ga_meth + cohort + PC1_cells + PC2_cells +
awar_nr*cohort,
data = dfb_sig_awar_nr))
})
Note that no significant interactions were found for maternal stress and cohort for the significant probes. The effect is statistically the same across cohorts.
mom_gtsum_pred <- lapply(dfm_sig_gtsum[grep("cg",colnames(dfm_sig_gtsum))], function(x){
prediction(rlm(x ~ Age + bmi + parous + pcsec + palco + cohort + PC1_cells +
gtsum,
data = dfm_sig_gtsum), vcov. = sandwich)
})
mom_gtsum_pred_plots <- lapply(mom_gtsum_pred, function(x) {
ggplot(x, aes(x = gtsum, y = fitted)) +
geom_point(shape = 20, alpha = 2/3) +
geom_smooth(method = "lm", se = TRUE, color = "red", fill = "salmon") +
theme_bw() +
xlab("General Trauma") +
ggtitle("Predicted methylation among Mothers")
})
# Add individual y labels:
mom_gtsum_pred_plots$cg11408019 <- mom_gtsum_pred_plots$cg11408019 + ylab("DNAm at cg11408019")
mom_gtsum_pred_plots$cg14519777 <- mom_gtsum_pred_plots$cg14519777 + ylab("DNAm at cg14519777")
mom_gtsum_pred_plots$cg14282695 <- mom_gtsum_pred_plots$cg14282695 + ylab("DNAm at cg14282695")
mom_gtsum_pred_plots$cg16543391 <- mom_gtsum_pred_plots$cg16543391 + ylab("DNAm at cg16543391")
mom_setot_pred <- lapply(dfm_sig_setot[grep("cg",colnames(dfm_sig_setot))], function(x){
prediction(rlm(x ~ Age + bmi + parous + pcsec + palco + cohort + PC1_cells +
setot,
data = dfm_sig_setot), vcov. = sandwich)
})
mom_setot_pred_plots <- lapply(mom_setot_pred, function(x) {
ggplot(x, aes(x = setot, y = fitted)) +
geom_point(shape = 20, alpha = 2/3) +
geom_smooth(method = "lm", se = TRUE, color = "red", fill = "salmon") +
theme_bw() +
xlab("Sexual Events") +
ggtitle("Predicted methylation among Mothers")
})
# Add individual y labels:
mom_setot_pred_plots$cg21219607 <- mom_setot_pred_plots$cg21219607 + ylab("DNAm at cg21219607")
mom_setot_pred_plots$cg06308131 <- mom_setot_pred_plots$cg06308131 + ylab("DNAm at cg06308131")
mom_setot_pred_plots$cg04358942 <- mom_setot_pred_plots$cg04358942 + ylab("DNAm at cg04358942")
mom_setot_pred_plots$cg23527517 <- mom_setot_pred_plots$cg23527517 + ylab("DNAm at cg23527517")
mom_setot_pred_plots$cg14859642 <- mom_setot_pred_plots$cg14859642 + ylab("DNAm at cg14859642")
mom_setot_pred_plots$cg00489624 <- mom_setot_pred_plots$cg00489624 + ylab("DNAm at cg00489624")
mom_setot_pred_plots$cg24308336 <- mom_setot_pred_plots$cg24308336 + ylab("DNAm at cg24308336")
mom_setot_pred_plots$cg16765764 <- mom_setot_pred_plots$cg16765764 + ylab("DNAm at cg16765764")
mom_setot_pred_plots$cg10897169 <- mom_setot_pred_plots$cg10897169 + ylab("DNAm at cg10897169")
mom_awar_nr_pred <- lapply(dfm_sig_awar_nr[grep("cg",colnames(dfm_sig_awar_nr))], function(x){
prediction(rlm(x ~ Age + bmi + parous + pcsec + palco + cohort + PC1_cells +
awar_nr,
data = dfm_sig_awar_nr), vcov. = sandwich)
})
mom_awar_nr_pred_plots <- lapply(mom_awar_nr_pred, function(x) {
ggplot(x, aes(x = awar_nr, y = fitted)) +
geom_point(shape = 20, alpha = 2/3) +
geom_smooth(method = "lm", se = TRUE, color = "red", fill = "salmon") +
theme_bw() +
xlab("War Stress") +
ggtitle("Predicted methylation among Mothers")
})
mom_awar_nr_pred_plots$cg13740840 <- mom_awar_nr_pred_plots$cg13740840 +
ylab("DNAm at cg13740840")
mom_awar_nr_pred_plots$cg26486174 <- mom_awar_nr_pred_plots$cg26486174 +
ylab("DNAm at cg26486174")
##############################
# Babies predicted methylation
# plots
##############################
baby_gtsum_pred <- lapply(dfb_sig_gtsum[grep("cg",colnames(dfb_sig_gtsum))], function(x){
prediction(rlm(x ~ age + bmi + parous + pcsec + palco + sex + ga_meth + cohort + PC1_cells + PC2_cells +
gtsum,
data = dfb_sig_gtsum), vcov. = sandwich)
})
baby_gtsum_pred_plots <- lapply(baby_gtsum_pred, function(x) {
ggplot(x, aes(x = gtsum, y = fitted)) +
geom_point(shape = 20, alpha = 2/3) +
geom_smooth(method = "lm", se = TRUE, color = "red", fill = "salmon") +
theme_bw() +
xlab("General Trauma") +
ggtitle("Predicted methylation among babies")
})
# Add y-axis labels:
baby_gtsum_pred_plots$cg24590750 <- baby_gtsum_pred_plots$cg24590750 + ylab("DNAm at cg24590750")
baby_gtsum_pred_plots$cg10783680 <- baby_gtsum_pred_plots$cg10783680 + ylab("DNAm at cg10783680")
baby_setot_pred <- lapply(dfb_sig_setot[grep("cg",colnames(dfb_sig_setot))], function(x){
prediction(rlm(x ~ age + bmi + parous + pcsec + palco + sex + ga_meth + cohort + PC1_cells + PC2_cells +
setot,
data = dfb_sig_setot), vcov. = sandwich)
})
baby_setot_pred_plots <- lapply(baby_setot_pred, function(x) {
ggplot(x, aes(x = setot, y = fitted)) +
geom_point(shape = 20, alpha = 2/3) +
geom_smooth(method = "lm", se = TRUE, color = "red", fill = "salmon") +
theme_bw() +
xlab("Sexual Violence") +
ggtitle("Predicted methylation among babies")
})
# Add y-axis labels:
baby_setot_pred_plots$cg10338475 <- baby_setot_pred_plots$cg10338475 + ylab("DNAm at cg10338475")
baby_setot_pred_plots$cg11386818 <- baby_setot_pred_plots$cg11386818 + ylab("DNAm at cg11386818")
baby_setot_pred_plots$cg02176407 <- baby_setot_pred_plots$cg02176407 + ylab("DNAm at cg02176407")
baby_setot_pred_plots$cg20807701 <- baby_setot_pred_plots$cg20807701 + ylab("DNAm at cg20807701")
baby_setot_pred_plots$cg09631059 <- baby_setot_pred_plots$cg09631059 + ylab("DNAm at cg09631059")
baby_setot_pred_plots$cg06873316 <- baby_setot_pred_plots$cg06873316 + ylab("DNAm at cg06873316")
baby_awar_nr_pred <- lapply(dfb_sig_awar_nr[grep("cg",colnames(dfb_sig_awar_nr))], function(x){
prediction(rlm(x ~ age + bmi + parous + pcsec + palco + sex + ga_meth + cohort + PC1_cells + PC2_cells +
awar_nr,
data = dfb_sig_awar_nr), vcov. = sandwich)
})
baby_awar_nr_pred_plots <- lapply(baby_awar_nr_pred, function(x) {
ggplot(x, aes(x = awar_nr, y = fitted)) +
geom_point(shape = 20, alpha = 2/3) +
geom_smooth(method = "lm", se = TRUE, color = "red", fill = "salmon") +
theme_bw() +
xlab("General Trauma") +
ggtitle("Predicted methylation among babies")
})
# Add y-axis labels:
baby_awar_nr_pred_plots$cg08985979 <- baby_awar_nr_pred_plots$cg08985979 +
ylab("DNAm at cg08985979")
baby_awar_nr_pred_plots$cg21172322 <- baby_awar_nr_pred_plots$cg21172322 +
ylab("DNAm at cg21172322")
baby_awar_nr_pred_plots$cg00741900 <- baby_awar_nr_pred_plots$cg00741900 +
ylab("DNAm at cg00741900")
# mom_gtsum_pred_plots
# mom_setot_pred_plots
# mom_awar_nr_pred_plots
# baby_gtsum_pred_plots
# baby_setot_pred_plots
# baby_awar_nr_pred_plots
Are any of the top 10 methylation PCs associated with maternal stress in a multivariate analysis?
mom_pc_tests <- dfm %>%
select(methylation_id,age,parous,pcsec,palco,ga_meth,cohort,
PC1_cells,bmi,gtsum,setot,awar_nr,achronic,PC1:PC10)
mom_pc_gtsum_res <- apply(mom_pc_tests[14:ncol(mom_pc_tests)],2, function(x){
summary(lm(x ~ age + bmi + parous + pcsec + palco + cohort + PC1_cells + cohort +
gtsum,
data = mom_pc_tests))
}) # Association with PC6 only, p = 0.042.
mom_pc_setot_res <- apply(mom_pc_tests[14:ncol(mom_pc_tests)],2, function(x){
summary(lm(x ~ age + bmi + parous + pcsec + palco + cohort + PC1_cells + cohort +
setot,
data = mom_pc_tests))
})
mom_pc_awar_nr_res <- apply(mom_pc_tests[14:ncol(mom_pc_tests)],2, function(x){
summary(lm(x ~ age + bmi + parous + pcsec + palco + cohort + PC1_cells + cohort +
awar_nr,
data = mom_pc_tests))
})
mom_pc_achronic_res <- apply(mom_pc_tests[14:ncol(mom_pc_tests)],2, function(x){
summary(lm(x ~ age + bmi + parous + pcsec + palco + cohort + PC1_cells + cohort +
achronic,
data = mom_pc_tests))
})
baby_pc_tests <- dfb %>%
select(methylation_id,age,parous,pcsec,sex,palco,ga_meth,cohort,bmi,
PC1_cells,PC2_cells,gtsum,setot,awar_nr,achronic,PC1:PC10)
baby_pc_gtsum_res <- apply(baby_pc_tests[16:ncol(baby_pc_tests)],2, function(x){
summary(lm(x ~ age + bmi + sex + parous + pcsec + palco + cohort +
PC1_cells + PC2_cells + ga_meth + gtsum,
data = baby_pc_tests))
})
baby_pc_setot_res <- apply(baby_pc_tests[16:ncol(baby_pc_tests)],2, function(x){
summary(lm(x ~ age + bmi + sex + parous + pcsec + palco + cohort +
PC1_cells + PC2_cells + ga_meth + setot,
data = baby_pc_tests))
})
baby_pc_awar_nr_res <- apply(baby_pc_tests[16:ncol(baby_pc_tests)],2, function(x){
summary(lm(x ~ age + bmi + sex + parous + pcsec + palco + cohort +
PC1_cells + PC2_cells + ga_meth + awar_nr,
data = baby_pc_tests))
})
baby_pc_achronic_res <- apply(baby_pc_tests[16:ncol(baby_pc_tests)],2, function(x){
summary(lm(x ~ age + bmi + sex + parous + pcsec + palco + cohort +
PC1_cells + PC2_cells + ga_meth + achronic,
data = baby_pc_tests))
})
Is general mean methylation associated with maternal stress in a multivariate analysis?
mom_gmm <- rowMeans(mom_betas, na.rm = T)
mom_gmm <- cbind.data.frame(names(mom_gmm),mom_gmm)
colnames(mom_gmm) <- c("methylation_id","gmm")
dfm <- dfm %>%
left_join(mom_gmm, by = c("methylation_id" = "methylation_id"))
model_gmm_mom_gtsum <- lm(gmm ~ bmi + age + pcsec + palco + parous + cohort + PC1_cells +
gtsum, data = dfm)
summary(model_gmm_mom_gtsum)
##
## Call:
## lm(formula = gmm ~ bmi + age + pcsec + palco + parous + cohort +
## PC1_cells + gtsum, data = dfm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0079520 -0.0017509 0.0001806 0.0017179 0.0070797
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.200e-01 2.548e-03 243.293 <2e-16 ***
## bmi 1.726e-05 7.195e-05 0.240 0.811
## age -3.705e-05 5.836e-05 -0.635 0.527
## pcsec 4.712e-05 6.209e-04 0.076 0.940
## palco -2.067e-04 7.353e-04 -0.281 0.779
## parous -2.865e-04 7.781e-04 -0.368 0.713
## cohortSV -6.775e-04 6.812e-04 -0.995 0.322
## PC1_cells -3.313e-03 2.263e-03 -1.464 0.146
## gtsum 3.898e-05 1.471e-04 0.265 0.791
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002974 on 136 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.03119, Adjusted R-squared: -0.0258
## F-statistic: 0.5473 on 8 and 136 DF, p-value: 0.819
model_gmm_mom_setot <- lm(gmm ~ bmi + age + pcsec + palco + parous + cohort + PC1_cells +
setot, data = dfm)
summary(model_gmm_mom_setot)
##
## Call:
## lm(formula = gmm ~ bmi + age + pcsec + palco + parous + cohort +
## PC1_cells + setot, data = dfm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0080284 -0.0018798 0.0003063 0.0018082 0.0069252
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.207e-01 2.560e-03 242.480 <2e-16 ***
## bmi 1.678e-05 7.161e-05 0.234 0.815
## age -5.171e-05 5.936e-05 -0.871 0.385
## pcsec 1.151e-05 6.187e-04 0.019 0.985
## palco -1.891e-04 7.317e-04 -0.258 0.796
## parous -3.638e-04 7.774e-04 -0.468 0.641
## cohortSV -6.072e-04 6.798e-04 -0.893 0.373
## PC1_cells -2.800e-03 2.294e-03 -1.221 0.224
## setot -1.901e-04 1.637e-04 -1.161 0.248
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002961 on 136 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.04021, Adjusted R-squared: -0.01625
## F-statistic: 0.7121 on 8 and 136 DF, p-value: 0.6805
model_gmm_mom_awar_nr <- lm(gmm ~ bmi + age + pcsec + palco + parous + cohort + PC1_cells +
awar_nr, data = dfm)
summary(model_gmm_mom_awar_nr)
##
## Call:
## lm(formula = gmm ~ bmi + age + pcsec + palco + parous + cohort +
## PC1_cells + awar_nr, data = dfm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.007961 -0.001770 0.000055 0.001850 0.006991
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.199e-01 2.548e-03 243.287 <2e-16 ***
## bmi 2.020e-05 7.217e-05 0.280 0.780
## age -3.825e-05 5.835e-05 -0.656 0.513
## pcsec 2.007e-05 6.234e-04 0.032 0.974
## palco -2.130e-04 7.345e-04 -0.290 0.772
## parous -3.085e-04 7.799e-04 -0.396 0.693
## cohortSV -6.636e-04 6.817e-04 -0.973 0.332
## PC1_cells -3.347e-03 2.263e-03 -1.479 0.142
## awar_nr 1.215e-04 2.711e-04 0.448 0.655
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002973 on 136 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.03212, Adjusted R-squared: -0.02482
## F-statistic: 0.5641 on 8 and 136 DF, p-value: 0.8057
model_gmm_mom_achronic <- lm(gmm ~ bmi + age + pcsec + palco + parous + cohort + PC1_cells +
achronic, data = dfm)
summary(model_gmm_mom_achronic)
##
## Call:
## lm(formula = gmm ~ bmi + age + pcsec + palco + parous + cohort +
## PC1_cells + achronic, data = dfm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0081353 -0.0016530 0.0003181 0.0016814 0.0068304
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.212e-01 2.656e-03 233.841 <2e-16 ***
## bmi 6.822e-06 7.206e-05 0.095 0.925
## age -4.825e-05 5.868e-05 -0.822 0.412
## pcsec -7.591e-05 6.253e-04 -0.121 0.904
## palco -1.847e-04 7.312e-04 -0.253 0.801
## parous -4.045e-04 7.799e-04 -0.519 0.605
## cohortSV -3.006e-04 7.448e-04 -0.404 0.687
## PC1_cells -3.024e-03 2.262e-03 -1.337 0.184
## achronic -1.019e-04 8.184e-05 -1.245 0.215
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002958 on 136 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.04162, Adjusted R-squared: -0.01476
## F-statistic: 0.7382 on 8 and 136 DF, p-value: 0.6576
baby_gmm <- rowMeans(baby_betas, na.rm = T)
baby_gmm <- cbind.data.frame(names(baby_gmm),baby_gmm)
colnames(baby_gmm) <- c("methylation_id","gmm")
dfb <- dfb %>%
left_join(baby_gmm, by = c("methylation_id" = "methylation_id"))
model_gmm_baby_gtsum <- lm(gmm ~ bmi + age + pcsec + palco + parous + sex + ga_meth +
cohort + PC1_cells + PC2_cells + gtsum, data = dfb)
summary(model_gmm_baby_gtsum)
##
## Call:
## lm(formula = gmm ~ bmi + age + pcsec + palco + parous + sex +
## ga_meth + cohort + PC1_cells + PC2_cells + gtsum, data = dfb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0139586 -0.0019797 -0.0000295 0.0021322 0.0077988
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.258e-01 1.200e-02 52.143 < 2e-16 ***
## bmi -6.690e-05 8.655e-05 -0.773 0.440913
## age -3.495e-06 7.006e-05 -0.050 0.960291
## pcsec -9.954e-04 7.313e-04 -1.361 0.175752
## palco 3.125e-03 8.865e-04 3.525 0.000582 ***
## parous 2.206e-04 9.138e-04 0.241 0.809601
## sexM -1.512e-03 6.221e-04 -2.431 0.016401 *
## ga_meth 3.859e-06 4.309e-05 0.090 0.928785
## cohortSV -1.296e-03 8.428e-04 -1.538 0.126407
## PC1_cells -5.943e-02 2.196e-03 -27.059 < 2e-16 ***
## PC2_cells -5.368e-02 3.113e-03 -17.244 < 2e-16 ***
## gtsum 1.680e-04 1.804e-04 0.931 0.353526
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.00353 on 133 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.9072, Adjusted R-squared: 0.8996
## F-statistic: 118.3 on 11 and 133 DF, p-value: < 2.2e-16
model_gmm_baby_setot <- lm(gmm ~ bmi + age + pcsec + palco + parous + sex + ga_meth +
cohort + PC1_cells + PC2_cells + setot, data = dfb)
summary(model_gmm_baby_setot)
##
## Call:
## lm(formula = gmm ~ bmi + age + pcsec + palco + parous + sex +
## ga_meth + cohort + PC1_cells + PC2_cells + setot, data = dfb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0136671 -0.0021212 -0.0001054 0.0021838 0.0079436
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.272e-01 1.195e-02 52.501 < 2e-16 ***
## bmi -6.695e-05 8.672e-05 -0.772 0.441485
## age 3.896e-06 7.187e-05 0.054 0.956854
## pcsec -9.474e-04 7.330e-04 -1.293 0.198398
## palco 3.071e-03 8.879e-04 3.458 0.000731 ***
## parous 2.612e-04 9.168e-04 0.285 0.776147
## sexM -1.456e-03 6.187e-04 -2.352 0.020118 *
## ga_meth -1.118e-06 4.317e-05 -0.026 0.979374
## cohortSV -1.389e-03 8.451e-04 -1.644 0.102533
## PC1_cells -5.931e-02 2.218e-03 -26.744 < 2e-16 ***
## PC2_cells -5.331e-02 3.078e-03 -17.322 < 2e-16 ***
## setot 1.168e-04 1.947e-04 0.600 0.549632
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.003536 on 133 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.9069, Adjusted R-squared: 0.8992
## F-statistic: 117.8 on 11 and 133 DF, p-value: < 2.2e-16
model_gmm_baby_awar_nr <- lm(gmm ~ bmi + age + pcsec + palco + parous + sex + ga_meth +
cohort + PC1_cells + PC2_cells + awar_nr, data = dfb)
summary(model_gmm_baby_awar_nr)
##
## Call:
## lm(formula = gmm ~ bmi + age + pcsec + palco + parous + sex +
## ga_meth + cohort + PC1_cells + PC2_cells + awar_nr, data = dfb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0134419 -0.0020071 -0.0000481 0.0021228 0.0077329
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.251e-01 1.208e-02 51.759 < 2e-16 ***
## bmi -5.740e-05 8.711e-05 -0.659 0.511076
## age -1.013e-05 7.015e-05 -0.144 0.885390
## pcsec -1.073e-03 7.377e-04 -1.454 0.148327
## palco 3.119e-03 8.857e-04 3.521 0.000589 ***
## parous 1.430e-04 9.175e-04 0.156 0.876410
## sexM -1.517e-03 6.217e-04 -2.441 0.015972 *
## ga_meth 6.304e-06 4.330e-05 0.146 0.884459
## cohortSV -1.268e-03 8.444e-04 -1.501 0.135682
## PC1_cells -5.933e-02 2.199e-03 -26.975 < 2e-16 ***
## PC2_cells -5.352e-02 3.081e-03 -17.374 < 2e-16 ***
## awar_nr 3.283e-04 3.280e-04 1.001 0.318680
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.003528 on 133 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.9073, Adjusted R-squared: 0.8997
## F-statistic: 118.4 on 11 and 133 DF, p-value: < 2.2e-16
model_gmm_baby_achronic <- lm(gmm ~ bmi + age + pcsec + palco + parous + sex + ga_meth +
cohort + PC1_cells + PC2_cells + achronic, data = dfb)
summary(model_gmm_baby_achronic)
##
## Call:
## lm(formula = gmm ~ bmi + age + pcsec + palco + parous + sex +
## ga_meth + cohort + PC1_cells + PC2_cells + achronic, data = dfb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.013766 -0.002232 0.000072 0.002145 0.008040
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.273e-01 1.202e-02 52.174 < 2e-16 ***
## bmi -6.925e-05 8.712e-05 -0.795 0.428100
## age -7.907e-06 7.109e-05 -0.111 0.911603
## pcsec -9.867e-04 7.374e-04 -1.338 0.183131
## palco 3.090e-03 8.884e-04 3.478 0.000683 ***
## parous 2.079e-04 9.222e-04 0.225 0.821956
## sexM -1.429e-03 6.196e-04 -2.307 0.022595 *
## ga_meth 5.693e-07 4.312e-05 0.013 0.989484
## cohortSV -1.268e-03 9.176e-04 -1.382 0.169232
## PC1_cells -5.944e-02 2.207e-03 -26.937 < 2e-16 ***
## PC2_cells -5.308e-02 3.076e-03 -17.258 < 2e-16 ***
## achronic -2.136e-05 9.481e-05 -0.225 0.822100
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.00354 on 133 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.9067, Adjusted R-squared: 0.899
## F-statistic: 117.5 on 11 and 133 DF, p-value: < 2.2e-16
Is general mean methylation associated with birth weight in a multivariate analysis?
model_gmm_mom_bwgt <- lm(bwgt ~ bmi + age + pcsec + palco + parous + cohort + PC1_cells + ga_meth + sex + gmm, data = dfm)
summary(model_gmm_mom_bwgt)
##
## Call:
## lm(formula = bwgt ~ bmi + age + pcsec + palco + parous + cohort +
## PC1_cells + ga_meth + sex + gmm, data = dfm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -951.98 -262.90 -41.36 269.98 1269.96
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 969.583 7549.626 0.128 0.898010
## bmi 27.737 10.034 2.764 0.006541 **
## age 18.971 8.283 2.290 0.023622 *
## pcsec 185.248 88.675 2.089 0.038666 *
## palco -96.423 104.069 -0.927 0.355900
## parous 63.750 109.800 0.581 0.562518
## cohortSV -105.136 97.619 -1.077 0.283490
## PC1_cells -146.513 317.691 -0.461 0.645447
## ga_meth 18.341 4.781 3.837 0.000194 ***
## sexM 150.266 70.839 2.121 0.035815 *
## gmm -6922.591 12027.295 -0.576 0.565906
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 411.8 on 129 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.3903, Adjusted R-squared: 0.343
## F-statistic: 8.256 on 10 and 129 DF, p-value: 2.978e-10
model_gmm_baby_bwgt <- lm(bwgt ~ bmi + age + pcsec + palco + parous + sex + ga_meth +
cohort + PC1_cells + PC2_cells + gmm, data = dfb)
summary(model_gmm_baby_bwgt)
##
## Call:
## lm(formula = bwgt ~ bmi + age + pcsec + palco + parous + sex +
## ga_meth + cohort + PC1_cells + PC2_cells + gmm, data = dfb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -908.05 -282.78 -35.77 251.19 1222.88
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2841.009 6539.612 0.434 0.664685
## bmi 26.048 10.168 2.562 0.011533 *
## age 16.889 8.223 2.054 0.041965 *
## pcsec 182.882 87.093 2.100 0.037647 *
## palco -67.572 108.159 -0.625 0.533212
## parous 43.828 107.156 0.409 0.683196
## sexM 160.551 73.678 2.179 0.031101 *
## ga_meth 19.839 5.026 3.947 0.000128 ***
## cohortSV -105.658 100.026 -1.056 0.292757
## PC1_cells -684.314 662.472 -1.033 0.303507
## PC2_cells -634.333 646.705 -0.981 0.328451
## gmm -10371.518 10189.110 -1.018 0.310586
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 412.7 on 132 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.3927, Adjusted R-squared: 0.3421
## F-statistic: 7.76 on 11 and 132 DF, p-value: 2.965e-10
Are any of the stress measures correlated with birth weight in a multivariate analysis?
model_gtsum <- lm(bwgt ~ bmi + age + pcsec + palco + parous + cohort + ga_meth + sex +
gtsum, data = dfm)
summary(model_gtsum)
##
## Call:
## lm(formula = bwgt ~ bmi + age + pcsec + palco + parous + cohort +
## ga_meth + sex + gtsum, data = dfm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -906.33 -276.85 -5.05 249.72 1350.42
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3183.342 1319.391 -2.413 0.017229 *
## bmi 27.539 9.960 2.765 0.006524 **
## age 18.684 8.151 2.292 0.023491 *
## pcsec 182.699 87.966 2.077 0.039778 *
## palco -97.647 103.207 -0.946 0.345840
## parous 60.819 106.162 0.573 0.567710
## cohortSV -105.061 96.697 -1.086 0.279274
## ga_meth 18.102 4.748 3.812 0.000212 ***
## sexM 159.603 70.503 2.264 0.025243 *
## gtsum -24.330 21.424 -1.136 0.258207
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 408.9 on 130 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.394, Adjusted R-squared: 0.352
## F-statistic: 9.39 on 9 and 130 DF, p-value: 6.507e-11
model_setot <- lm(bwgt ~ bmi + age + pcsec + palco + parous + cohort + ga_meth + sex +
setot, data = dfm)
summary(model_setot)
##
## Call:
## lm(formula = bwgt ~ bmi + age + pcsec + palco + parous + cohort +
## ga_meth + sex + setot, data = dfm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -998.48 -252.27 -12.67 252.08 1087.43
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3211.559 1312.740 -2.446 0.015763 *
## bmi 27.728 9.927 2.793 0.006005 **
## age 21.250 8.285 2.565 0.011460 *
## pcsec 189.762 87.760 2.162 0.032428 *
## palco -97.164 102.802 -0.945 0.346334
## parous 66.234 105.931 0.625 0.532896
## cohortSV -113.724 96.723 -1.176 0.241839
## ga_meth 17.636 4.748 3.715 0.000301 ***
## sexM 149.445 69.890 2.138 0.034364 *
## setot 33.159 22.322 1.485 0.139834
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 407.5 on 130 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.3982, Adjusted R-squared: 0.3565
## F-statistic: 9.557 on 9 and 130 DF, p-value: 4.289e-11
model_awar_nr <- lm(bwgt ~ bmi + age + pcsec + palco + parous + cohort + ga_meth + sex +
awar_nr, data = dfm)
summary(model_awar_nr) # significant nega_methtive association. p = 0.002. b = -115.8
##
## Call:
## lm(formula = bwgt ~ bmi + age + pcsec + palco + parous + cohort +
## ga_meth + sex + awar_nr, data = dfm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -886.92 -275.65 -2.93 249.10 1277.11
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2759.771 1285.624 -2.147 0.033679 *
## bmi 24.875 9.683 2.569 0.011328 *
## age 19.899 7.900 2.519 0.012988 *
## pcsec 214.687 85.759 2.503 0.013540 *
## palco -102.006 99.907 -1.021 0.309147
## parous 88.800 103.254 0.860 0.391360
## cohortSV -126.836 93.918 -1.350 0.179202
## ga_meth 16.904 4.616 3.662 0.000363 ***
## sexM 171.134 68.200 2.509 0.013325 *
## awar_nr -115.839 36.661 -3.160 0.001965 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 396 on 130 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.4316, Adjusted R-squared: 0.3923
## F-statistic: 10.97 on 9 and 130 DF, p-value: 1.37e-12
# n = 140, 11 observations deleted due to missingness.
# How many samples in this model?
nrow(model.frame(model_awar_nr)) # 140, out of 151 (11 NA observations deleted).
## [1] 140
model_achronic <- lm(bwgt ~ bmi + age + pcsec + palco + parous + cohort + ga_meth + sex +
achronic, data = dfm)
summary(model_achronic)
##
## Call:
## lm(formula = bwgt ~ bmi + age + pcsec + palco + parous + cohort +
## ga_meth + sex + achronic, data = dfm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -940.26 -275.79 -17.07 243.81 1285.39
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3197.809 1330.971 -2.403 0.017691 *
## bmi 26.901 10.068 2.672 0.008505 **
## age 18.231 8.251 2.209 0.028891 *
## pcsec 176.901 89.129 1.985 0.049276 *
## palco -92.168 103.518 -0.890 0.374921
## parous 50.092 107.090 0.468 0.640744
## cohortSV -75.666 106.061 -0.713 0.476863
## ga_meth 18.173 4.767 3.812 0.000212 ***
## sexM 153.429 70.499 2.176 0.031338 *
## achronic -6.154 11.350 -0.542 0.588634
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 410.5 on 130 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.3893, Adjusted R-squared: 0.3471
## F-statistic: 9.209 on 9 and 130 DF, p-value: 1.026e-10
Are any of the top hits correlated with birthweight in a multivariate analysis?
cpg_model_mom_gtsum <- lapply(dfm_sig_gtsum[grep("cg",colnames(dfm_sig_gtsum))], function(x){
summary(lm(bwgt ~ Age + bmi + parous + pcsec + palco + cohort + PC1_cells + ga_meth + sex +
x,
data = dfm_sig_gtsum))
})
cpg_model_mom_setot <- lapply(dfm_sig_setot[grep("cg",colnames(dfm_sig_setot))], function(x){
summary(lm(bwgt ~ Age + bmi + parous + pcsec + palco + cohort + PC1_cells + ga_meth + sex +
x,
data = dfm_sig_setot))
})
cpg_model_mom_awar_nr <- lapply(dfm_sig_awar_nr[grep("cg",colnames(dfm_sig_awar_nr))], function(x){
summary(lm(bwgt ~ Age + bmi + parous + pcsec + palco + cohort + PC1_cells + ga_meth + sex +
x,
data = dfm_sig_awar_nr))
})
# babies
cpg_model_baby_gtsum <- lapply(dfb_sig_gtsum[grep("cg",colnames(dfb_sig_gtsum))], function(x){
summary(lm(bwgt ~ age + bmi + parous + pcsec + palco + cohort + PC1_cells + PC2_cells + sex + ga_meth +
x,
data = dfb_sig_gtsum))
})
cpg_model_baby_setot <- lapply(dfb_sig_setot[grep("cg",colnames(dfb_sig_setot))], function(x){
summary(lm(bwgt ~ age + bmi + parous + pcsec + palco + cohort + PC1_cells + PC2_cells + sex + ga_meth +
x,
data = dfb_sig_setot))
})
cpg_model_baby_awar_nr <- lapply(dfb_sig_awar_nr[grep("cg",colnames(dfb_sig_awar_nr))], function(x){
summary(lm(bwgt ~ age + bmi + parous + pcsec + palco + cohort + PC1_cells + PC2_cells + sex + ga_meth +
x,
data = dfb_sig_awar_nr))
})
nrow(model.frame(lm(bwgt ~ age + bmi + parous + pcsec + palco + cohort + PC1_cells + PC2_cells + sex + ga_meth +
cg08985979,
data = dfb_sig_awar_nr)))
## [1] 144
# 144 participants included, 7 observations deleted due to missingness.
cg08985979_model <- check_model(lm(bwgt ~ age + bmi + parous + pcsec +
palco + cohort + PC1_cells + PC2_cells + sex + ga_meth +
cg08985979,
data = dfb_sig_awar_nr))
cg08985979_pred <- prediction(lm(bwgt ~ age + bmi + parous + pcsec +
palco + cohort + PC1_cells + PC2_cells + sex + ga_meth +
cg08985979,
data = dfb_sig_awar_nr))
cg08985979_pred <-
ggplot(cg08985979_pred, aes(x = cg08985979, y = fitted)) +
geom_point(shape = 20, alpha = 2/3) +
geom_smooth(method = "lm", se = TRUE, color = "red", fill = "salmon") +
theme_bw() +
ylab("Adjusted birthweight (grams)") +
xlab("Methylation at cg08985979") +
ggtitle("Birthweight is predicted by DNAm at cg08985979 among babies",
subtitle = "beta = -4770.2, p = 0.03")
# Get top 1000 significant sites for each analyses. Check for overlap
# across the four measures within generation and then check for overlap
# across generation within each of the four stress measures. Use
# Venn Diagrams.
top1k_mom_gtsum <- mom_gtsum_man2 %>%
select(probe,pval) %>%
arrange(pval) %>%
slice_head(n = 1000)
top1k_mom_setot <- mom_setot_man2 %>%
select(probe,pval) %>%
arrange(pval) %>%
slice_head(n = 1000)
top1k_mom_awar_nr <- mom_awar_nr_man2 %>%
select(probe,pval) %>%
arrange(pval) %>%
slice_head(n = 1000)
top1k_mom_achronic <- mom_achronic_man2 %>%
select(probe,pval) %>%
arrange(pval) %>%
slice_head(n = 1000)
##########################
# Babies top 1000 hits
#########################
top1k_baby_gtsum <- baby_gtsum_man2 %>%
select(probe,pval) %>%
arrange(pval) %>%
slice_head(n = 1000)
top1k_baby_setot <- baby_setot_man2 %>%
select(probe,pval) %>%
arrange(pval) %>%
slice_head(n = 1000)
top1k_baby_awar_nr <- baby_awar_nr_man2 %>%
select(probe,pval) %>%
arrange(pval) %>%
slice_head(n = 1000)
top1k_baby_achronic <- baby_achronic_man2 %>%
select(probe,pval) %>%
arrange(pval) %>%
slice_head(n = 1000)
# Within generation across all four stress measures
mom_within <- list(gen_trauma = top1k_mom_gtsum$probe,
sexual_events = top1k_mom_setot$probe,
war_stress = top1k_mom_awar_nr$probe,
chronic_stress = top1k_mom_achronic$probe)
mom_within_venn <- ggVennDiagram(mom_within) +
labs(title = "Mothers top 1k hits") +
scale_x_continuous(expand = expansion(mult = .2))
baby_within <- list(gen_trauma = top1k_baby_gtsum$probe,
sexual_events = top1k_baby_setot$probe,
war_stress = top1k_baby_awar_nr$probe,
chronic_stress = top1k_baby_achronic$probe)
baby_within_venn <- ggVennDiagram(baby_within) +
labs(title = "Babies top 1k hits") +
scale_x_continuous(expand = expansion(mult = .2))
within_stress_venn <- ggarrange(mom_within_venn,baby_within_venn, nrow = 2, common.legend = TRUE)
# Across generation for each stress measure
gtsum_across <- list(mothers = top1k_mom_gtsum$probe,
babies = top1k_baby_gtsum$probe)
gtsum_across_venn <- ggVennDiagram(gtsum_across) +
labs(title = "General Trauma - Mother infant dyads")
setot_across <- list(mothers = top1k_mom_setot$probe,
babies = top1k_baby_setot$probe)
setot_across_venn <- ggVennDiagram(setot_across) +
labs(title = "Sexual Events - Mother infant dyads")
awar_nr_across <- list(mothers = top1k_mom_awar_nr$probe,
babies = top1k_baby_awar_nr$probe)
awar_nr_across_venn <- ggVennDiagram(awar_nr_across) +
labs(title = "General Trauma - Mother infant dyads")
achronic_across <- list(mothers = top1k_mom_achronic$probe,
babies = top1k_baby_achronic$probe)
achronic_across_venn <- ggVennDiagram(achronic_across) +
labs(title = "General Trauma - Mother infant dyads")
between_stress_venn <- ggarrange(gtsum_across_venn,setot_across_venn,
awar_nr_across_venn,achronic_across_venn,
ncol = 2,
nrow = 2, common.legend = TRUE)
Prepare files to upload to the eFORGE and eFORGE-TF websites for analysis. These sites can be accessed through links in this paper, which describes the two tools.
# Get all sites with top 125 hits for each EWAS and then
# write them to text files to be uploaded to the
# eFORGE and eFORGE-TF websites. The author recommends using larger
# 125-1250 probes and separating hypo and hyper methylation probes.
# Mothers
## General Trauma
# top 125 - hyper and hypo methylated combined
mom_gtsum_top_125 <- mom_gtsum_man2 %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(mom_gtsum_top_125,
file = here("output","mothers_gtsum_top_125_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# top 125 - hyper methylated only
mom_gtsum_top_125_hyper <- mom_gtsum_man2 %>%
filter(coef > 0) %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(mom_gtsum_top_125_hyper,
file = here("output","mothers_gtsum_top_125_hyper_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# top 125 - hypo methylated only
mom_gtsum_top_125_hypo <- mom_gtsum_man2 %>%
filter(coef < 0) %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(mom_gtsum_top_125_hypo,
file = here("output","mothers_gtsum_top_125_hypo_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
## Sexual Events
# top 125 - hyper and hypo methylated combined
mom_setot_top_125 <- mom_setot_man2 %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(mom_setot_top_125,
file = here("output","mothers_setot_top_125_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# top 125 - hyper methylated only
mom_setot_top_125_hyper <- mom_setot_man2 %>%
filter(coef > 0) %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(mom_setot_top_125_hyper,
file = here("output","mothers_setot_top_125_hyper_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# top 125 - hypo methylated only
mom_setot_top_125_hypo <- mom_setot_man2 %>%
filter(coef < 0) %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(mom_setot_top_125_hypo,
file = here("output","mothers_setot_top_125_hypo_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
## War Stress
# top 125 - hyper and hypo methylated combined
mom_awar_nr_top_125 <- mom_awar_nr_man2 %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(mom_awar_nr_top_125,
file = here("output","mothers_awar_nr_top_125_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# top 125 - hyper methylated only
mom_awar_nr_top_125_hyper <- mom_awar_nr_man2 %>%
filter(coef > 0) %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(mom_awar_nr_top_125_hyper,
file = here("output","mothers_awar_nr_top_125_hyper_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# top 125 - hypo methylated only
mom_awar_nr_top_125_hypo <- mom_awar_nr_man2 %>%
filter(coef < 0) %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(mom_awar_nr_top_125_hypo,
file = here("output","mothers_awar_nr_top_125_hypo_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# Babies
## General Trauma
# top 125 - hyper and hypo methylated combined
baby_gtsum_top_125 <- baby_gtsum_man2 %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(baby_gtsum_top_125,
file = here("output","babies_gtsum_top_125_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# top 125 - hyper methylated only
baby_gtsum_top_125_hyper <- baby_gtsum_man2 %>%
filter(coef > 0) %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(baby_gtsum_top_125_hyper,
file = here("output","babies_gtsum_top_125_hyper_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# top 125 - hypo methylated only
baby_gtsum_top_125_hypo <- baby_gtsum_man2 %>%
filter(coef < 0) %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(baby_gtsum_top_125_hypo,
file = here("output","babies_gtsum_top_125_hypo_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
## Sexual Events
# top 125 - hyper and hypo methylated combined
baby_setot_top_125 <- baby_setot_man2 %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(baby_setot_top_125,
file = here("output","babies_setot_top_125_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# top 125 - hyper methylated only
baby_setot_top_125_hyper <- baby_setot_man2 %>%
filter(coef > 0) %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(baby_setot_top_125_hyper,
file = here("output","babies_setot_top_125_hyper_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# top 125 - hypo methylated only
baby_setot_top_125_hypo <- baby_setot_man2 %>%
filter(coef < 0) %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(baby_setot_top_125_hypo,
file = here("output","babies_setot_top_125_hypo_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
## War Stress
# top 125 - hyper and hypo methylated combined
baby_awar_nr_top_125 <- baby_awar_nr_man2 %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(baby_awar_nr_top_125,
file = here("output","babies_awar_nr_top_125_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# top 125 - hyper methylated only
baby_awar_nr_top_125_hyper <- baby_awar_nr_man2 %>%
filter(coef > 0) %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(baby_awar_nr_top_125_hyper,
file = here("output","babies_awar_nr_top_125_hyper_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
# top 125 - hypo methylated only
baby_awar_nr_top_125_hypo <- baby_awar_nr_man2 %>%
filter(coef < 0) %>%
slice_min(order_by = pval, n = 125) %>%
pull(probe)
write.table(baby_awar_nr_top_125_hypo,
file = here("output","babies_awar_nr_top_125_hypo_eforge.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE)
Now read in results from the eFORGE website using the text files generated in the chunk above and make a heatmap of results for newborns only.
# Create list of text files
txt_files_ls = list.files(path=here("data"), pattern="*.gz")
# Read the files in, assuming tab separator
txt_files_df <- lapply(txt_files_ls, function(x) {read.table(file = here("data",x),
header = TRUE,
sep ="\t")})
# Combine them
combined_df <- do.call("rbind", lapply(txt_files_df, as.data.frame))
edf <- combined_df %>%
mutate(exposure = c(rep(c("War Trauma"),39*3),
rep(c("General Trauma"), 39*3),
rep(c("Sexual Trauma"),39*3))) %>%
mutate(analysis = c(rep(c("Top 125 Hyper"),39),
rep(c("Top 125 Hypo"),39),
rep(c("Top 125"),39),
rep(c("Top 125 Hyper"),39),
rep(c("Top 125 Hypo"),39),
rep(c("Top 125"),39),
rep(c("Top 125 Hyper"),39),
rep(c("Top 125 Hypo"),39),
rep(c("Top 125"),39))) %>%
mutate(exposure_analysis = paste(exposure,analysis,sep = " - "))
edf$Tissue2 <- fct_collapse(edf$Tissue,
Embryonic = c("ES Cell"),
iPS = c("IPS cell"),
Fetal = c("Feta Intestine Small",
"Fetal Adrenal Gland",
"Fetal Brain",
"Fetal Heart",
"Fetal Intestine Large",
"Fetal Kidney",
"Fetal Lung",
"Fetal Muscle Leg",
"Fetal Muscle Trunk",
"Fetal Stomach",
"Fetal Thymus"),
Other = c("Blood","Breast","Gastric",
"Lung","Ovary","Pancreas",
"Placenta","Psoas Muscle",
"Skin","Small Intestine"))
# order the tissue2 column
edf$Tissue2 <- factor(edf$Tissue2,
levels = c("Embryonic","iPS","Fetal","Other"),
ordered = TRUE)
edf2 <- edf %>%
slice_head(n=39) %>%
arrange(Tissue2)
# Get Qvalues into cells of a matrix with tissue type in rownames
# and exposure_analysis in colnames.
qmat <- matrix(edf$Qvalue, nrow = 39, ncol = 9,
byrow = FALSE)
rownames(qmat) <- edf$Cell[1:39]
colnames(qmat) <- unique(edf$exposure_analysis)
# Now rearrange the columns so that general trauma
# are the first three columns, then sexual events,
# then war trauma
qmat1 <- qmat[,4:9]
qmat2 <- qmat[,1:3]
qmat3 <- cbind(qmat1,qmat2)
# rearrange rows to match the Tissue2 vector in edf2.
linker <- match(edf2$Cell,rownames(qmat3))
#
qmat4 <- qmat3[linker,]
#rownames(qmat4) <- edf2$Cell
colnames(qmat4) <- rep(c("Top 125 Hyper","Top 125 Hypo","Top 125"),3)
col_fun = colorRamp2(c(0, 1), c("red","blue"))
# Make row annotation:
RowAnn <- data.frame(edf2$Tissue2)
colnames(RowAnn) <- c("Tissue")
colours2 <- list("Tissue" = c(
"Other"="green",
"Embryonic"="darkgreen",
"Fetal" = "darkseagreen",
"iPS" = "darkolivegreen2"))
RowAnn <- HeatmapAnnotation(df=RowAnn, col=colours2, which="row")
tissue_matrix <- matrix(edf2$Tissue2[1:39],ncol = 1)
colnames(tissue_matrix) <- "Tissue"
rownames(tissue_matrix) <- rownames(qmat4)
hmap <- Heatmap(qmat4,
name = "q value",
col = col_fun,
show_column_names = TRUE,
cluster_rows = FALSE,
cluster_columns = FALSE,
cell_fun = function(j, i, x, y, w, h, fill) {
if (qmat4[i, j] < 0.001) {
grid.text("***", x, y)
} else if (qmat4[i, j] < 0.01) {
grid.text("**", x, y)
} else if (qmat4[i, j] < 0.05) {
grid.text("*", x, y)
}
},
column_split = rep(1:3, each = 3),
width = unit(100,"mm"),
heatmap_height = unit(180,"mm"),
left_annotation = RowAnn,
use_raster = TRUE,
raster_quality = 5,
top_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(fill = 5:7),
labels = c("general trauma", "sexual trauma", "war trauma"))))
draw(hmap, heatmap_legend_side="left",
annotation_legend_side="left",
merge_legend = TRUE)
Pick up analyses with the mage4 object created above.
Also drop samples marked for dropping at the top of the script due to sample problems like duplicates and siblings in the data set.
# Horvath recommends removing samples with corSampleVSgoldstandard < 0.8.
# This "gs" object created above contains the only sample failing this check.
# The rest of the samples to remove will be based on the Meffil
# quality control, which is more robust than
# the epigenetic clock sex checks, for example. Summarized in this object:
fail_qc
## [1] "C006bb.1.2." "C5bb.1.2" "C6.bb.2.1a" "C91bb.1.2" "SV001b.b.1.2"
## [6] "SV014b.b.1.1" "C084M" "C086M" "SV018M.1.1" "SV043M.2.1a"
gs %in% fail_qc # TRUE.
## [1] TRUE
################################################################
# DROP THE SIBLINGS AND DUPLICATE SAMPLES in the dropper object
################################################################
droppers
## [1] "C91bb.1.2" "C102M.2.1." "C102bb.2.1."
## [4] "C091M.1.1." "C091bb.2.1." "C103bb.2.1."
## [7] "C103M.1.1." "C101M.1.1" "C101bb.1.1"
## [10] "C76bb.1.1" "C76M.1.2" "SV058M.1.1 - eq101"
## [13] "SV069M.1.1" "SV069b.b.1.1 - eq100" "SV058b.b.1.1"
## [16] "SV043M.2.1a"
mom_mage <- mage3_mother %>%
filter(!methylation_id %in% fail_qc) %>% # Lose 4 samples to QC
filter(!methylation_id %in% droppers) %>% # Lose 7 samples because of duplicate moms.
mutate(bmi = mwgt/((mhgt/100)^2)) %>%
rename(parous = is_this_your_first_child) %>%
# This line brings in the cell type principal components
left_join(mom_pr_cells, by = c("methylation_id" = "methylation_id"))
baby_mage <- mage3_baby %>%
filter(!methylation_id %in% fail_qc) %>% # 4 samples lost to QC
filter(!methylation_id %in% droppers) %>% # Lose 7 samples because of sibling samples.
mutate(bmi = mwgt/((mhgt/100)^2)) %>%
rename(parous = is_this_your_first_child) %>%
# This line brings in the cell type principal components
left_join(baby_pr_cells, by = c("methylation_id" = "methylation_id"))
# What are the differences between the EWAS analytic sample
# and the epigenetic age analytic sample?
setdiff(mom_mage$methylation_id,dfm$methylation_id)
## [1] "C25M1.1" "C49M.1.2_400" "C7M.1.1_clean" "C2M.1.1_250"
setdiff(baby_mage$methylation_id,dfb$methylation_id)
## [1] "C1bb.1.1" "C25b.b.1.1" "C34bb.1.1_clean" "C78bb.1.1_250"
# These differences are accounted for by the fact that
# we dropped some of the small batches for the EWAS
# analyses to enable batch correction using ComBat. Some
# of the batches were so small they were preventing batch
# correction. This isn't a recognized problem for
# epigenetic age analyses.
dim(baby_mage) # 155 babies
## [1] 155 161
dim(mom_mage) # 155 mothers
## [1] 155 160
# create the final wide dataset
mage_wide <- baby_mage %>%
full_join(mom_mage, by = c("dyad" = "dyad"), suffix = c("_baby","_mother"))
# 160 different dyads in the wide data set.
Test for associations between stress measures and age-appropriate DNA methylation clocks. Use Horvath, IEAA, and EEAA for babies. Use the same three plus DNAmTLAdjAge, GrimAge and PhenoAge for mothers. Also do a birthweight test.
mom_eaa_cols <- c("AgeAccelerationResidual",
"IEAA","EEAA","DNAmTLAdjAge",
"AgeAccelPheno","AgeAccelGrim")
mom_gtsum_eaa <- apply(mom_mage[mom_eaa_cols], 2, function(x){
res <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort + gtsum, data = mom_mage),
conf.int = TRUE)
res_cell <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort + PC1_cells + gtsum, data = mom_mage),
conf.int = TRUE)
return(list(res,res_cell))
})
mom_gtsum_eaa <- lapply(mom_gtsum_eaa,getValues)
mom_gtsum_eaa <- do.call(rbind,mom_gtsum_eaa)
mom_setot_eaa <- apply(mom_mage[mom_eaa_cols], 2, function(x){
res <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort + setot, data = mom_mage),
conf.int = TRUE)
res_cell <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort + PC1_cells + setot, data = mom_mage),
conf.int = TRUE)
return(list(res,res_cell))
})
mom_setot_eaa <- lapply(mom_setot_eaa,getValues)
mom_setot_eaa <- do.call(rbind,mom_setot_eaa)
mom_awar_nr_eaa <- apply(mom_mage[mom_eaa_cols], 2, function(x){
res <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort + awar_nr, data = mom_mage),
conf.int = TRUE)
res_cell <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort + PC1_cells + awar_nr, data = mom_mage),
conf.int = TRUE)
return(list(res,res_cell))
})
mom_awar_nr_eaa <- lapply(mom_awar_nr_eaa,getValues)
mom_awar_nr_eaa <- do.call(rbind,mom_awar_nr_eaa)
mom_achronic_eaa <- apply(mom_mage[mom_eaa_cols], 2, function(x){
res <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort + achronic, data = mom_mage),
conf.int = TRUE)
res_cell <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort + PC1_cells + achronic, data = mom_mage),
conf.int = TRUE)
return(list(res,res_cell))
})
mom_achronic_eaa <- lapply(mom_achronic_eaa,getValues)
mom_achronic_eaa <- do.call(rbind,mom_achronic_eaa)
baby_eaa_cols <- c("AgeAccelerationResidual",
"IEAA","EEAA")
baby_gtsum_eaa <- apply(baby_mage[baby_eaa_cols], 2, function(x){
res <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort +
sex + ga_meth + gtsum, data = baby_mage),
conf.int = TRUE)
res_cell <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort +
sex + ga_meth + PC1_cells + PC2_cells + gtsum, data = baby_mage),
conf.int = TRUE)
return(list(res,res_cell))
})
baby_gtsum_eaa <- lapply(baby_gtsum_eaa,getValues)
baby_gtsum_eaa <- do.call(rbind,baby_gtsum_eaa)
baby_setot_eaa <- apply(baby_mage[baby_eaa_cols], 2, function(x){
res <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort +
sex + ga_meth + setot, data = baby_mage),
conf.int = TRUE)
res_cell <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort +
sex + ga_meth + PC1_cells + PC2_cells + setot, data = baby_mage),
conf.int = TRUE)
return(list(res,res_cell))
})
baby_setot_eaa <- lapply(baby_setot_eaa,getValues)
baby_setot_eaa <- do.call(rbind,baby_setot_eaa)
baby_awar_nr_eaa <- apply(baby_mage[baby_eaa_cols], 2, function(x){
res <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort +
sex + ga_meth + awar_nr, data = baby_mage),
conf.int = TRUE)
res_cell <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort +
sex + ga_meth + PC1_cells + PC2_cells + awar_nr, data = baby_mage),
conf.int = TRUE)
return(list(res,res_cell))
})
baby_awar_nr_eaa <- lapply(baby_awar_nr_eaa,getValues)
baby_awar_nr_eaa <- do.call(rbind,baby_awar_nr_eaa)
baby_achronic_eaa <- apply(baby_mage[baby_eaa_cols], 2, function(x){
res <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort +
sex + ga_meth + achronic, data = baby_mage),
conf.int = TRUE)
res_cell <- tidy(lm(x ~ bmi + pcsec + palco + parous + cohort +
sex + ga_meth + PC1_cells + PC2_cells + achronic, data = baby_mage),
conf.int = TRUE)
return(list(res,res_cell))
})
baby_achronic_eaa <- lapply(baby_achronic_eaa,getValues)
baby_achronic_eaa <- do.call(rbind,baby_achronic_eaa)
Heatmap of results.
mom_gtsum_eaa <- mom_gtsum_eaa %>%
select(coef,pval) %>%
mutate(clock = c("Horvath","Horvath_cell",
"IEAA","IEAA_cell",
"EEAA","EEAA_cell",
"Telomere","Telomere_cell",
"PhenoAge","PhenoAge_cell",
"GrimAge","GrimAge_cell")) %>%
mutate(exposure = c("general trauma"))
mom_setot_eaa <- mom_setot_eaa %>%
select(coef,pval) %>%
mutate(clock = c("Horvath","Horvath_cell",
"IEAA","IEAA_cell",
"EEAA","EEAA_cell",
"Telomere","Telomere_cell",
"PhenoAge","PhenoAge_cell",
"GrimAge","GrimAge_cell")) %>%
mutate(exposure = c("sexual events"))
mom_awar_nr_eaa <- mom_awar_nr_eaa %>%
select(coef,pval) %>%
mutate(clock = c("Horvath","Horvath_cell",
"IEAA","IEAA_cell",
"EEAA","EEAA_cell",
"Telomere","Telomere_cell",
"PhenoAge","PhenoAge_cell",
"GrimAge","GrimAge_cell")) %>%
mutate(exposure = c("war stress"))
mom_achronic_eaa <- mom_achronic_eaa %>%
select(coef,pval) %>%
mutate(clock = c("Horvath","Horvath_cell",
"IEAA","IEAA_cell",
"EEAA","EEAA_cell",
"Telomere","Telomere_cell",
"PhenoAge","PhenoAge_cell",
"GrimAge","GrimAge_cell")) %>%
mutate(exposure = c("chronic stress"))
combo_mom <- rbind(mom_gtsum_eaa,mom_setot_eaa,mom_awar_nr_eaa,mom_achronic_eaa)
# Get a matrix of the coefficient values
mom_mat <- matrix(data = combo_mom$coef, byrow = FALSE, ncol = 4,nrow = 12)
rownames(mom_mat)<- c("Horvath","Horvath_cell",
"IEAA","IEAA_cell",
"EEAA","EEAA_cell",
"Telomere","Telomere_cell",
"PhenoAge","PhenoAge_cell",
"GrimAge","GrimAge_cell")
colnames(mom_mat) <- c("general trauma","sexual trauma",
"war trauma", "chronic stress")
# Subset for the tests we want:
mom_mat <- mom_mat[-(grep("_cell",rownames(mom_mat))),]
# Get a matrix of the p values
pval_mom <- matrix(data = combo_mom$pval, byrow = FALSE, ncol = 4,nrow = 12)
rownames(pval_mom)<- c("Horvath","Horvath_cell",
"IEAA","IEAA_cell",
"EEAA","EEAA_cell",
"Telomere","Telomere_cell",
"PhenoAge","PhenoAge_cell",
"GrimAge","GrimAge_cell")
colnames(pval_mom) <- c("general trauma","sexual trauma",
"war trauma", "chronic stress")
# Subset for the tests we want:
pval_mom <- pval_mom[-(grep("_cell",rownames(pval_mom))),]
print(mom_mat)
## general trauma sexual trauma war trauma chronic stress
## Horvath 0.218788722 0.17113376 -0.2343213 -0.165975588
## IEAA 0.185559088 0.12639270 -0.1093203 -0.143285166
## EEAA 0.275574695 1.08649268 -0.1569762 0.207576419
## Telomere -0.009541149 -0.02150548 0.0021936 -0.001063849
## PhenoAge 0.555257195 1.36186379 -0.5609807 0.185488241
## GrimAge 0.208747196 0.42459712 -0.1589974 0.064978777
print(pval_mom)
## general trauma sexual trauma war trauma chronic stress
## Horvath 0.4968620 0.627564080 0.7000867 0.3492283
## IEAA 0.5228680 0.691159119 0.8420720 0.3702547
## EEAA 0.4286302 0.003864255 0.8114353 0.2787733
## Telomere 0.2730758 0.023092653 0.8940465 0.8248359
## PhenoAge 0.3244419 0.026238341 0.5984409 0.5506848
## GrimAge 0.1910909 0.014408464 0.5988246 0.4611069
col_fun = colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))
mom_eaa_heatmap <- Heatmap(mom_mat,
name = "beta",
col = col_fun,
column_title = "Maternal Stress and Epigenetic Age Acceleration",
row_title = "Mothers",
cluster_rows = FALSE,
cluster_columns = FALSE,
column_names_rot = 0,
column_names_centered = TRUE,
cell_fun = function(j, i, x, y, w, h, fill) {
if (pval_mom[i, j] < 0.001) {
grid.text("***", x, y)
} else if (pval_mom[i, j] < 0.01) {
grid.text("**", x, y)
} else if (pval_mom[i, j] < 0.05) {
grid.text("*", x, y)
}
},
row_gap = unit(0, "mm"), border = TRUE)
baby_gtsum_eaa <- baby_gtsum_eaa %>%
select(coef,pval) %>%
mutate(clock = c("Horvath","Horvath_cell",
"IEAA","IEAA_cell",
"EEAA","EEAA_cell")) %>%
mutate(exposure = c("general trauma"))
baby_setot_eaa <- baby_setot_eaa %>%
select(coef,pval) %>%
mutate(clock = c("Horvath","Horvath_cell",
"IEAA","IEAA_cell",
"EEAA","EEAA_cell")) %>%
mutate(exposure = c("sexual events"))
baby_awar_nr_eaa <- baby_awar_nr_eaa %>%
select(coef,pval) %>%
mutate(clock = c("Horvath","Horvath_cell",
"IEAA","IEAA_cell",
"EEAA","EEAA_cell")) %>%
mutate(exposure = c("war stress"))
baby_achronic_eaa <- baby_achronic_eaa %>%
select(coef,pval) %>%
mutate(clock = c("Horvath","Horvath_cell",
"IEAA","IEAA_cell",
"EEAA","EEAA_cell")) %>%
mutate(exposure = c("chronic stress"))
combo_baby <- rbind(baby_gtsum_eaa,baby_setot_eaa,baby_awar_nr_eaa,baby_achronic_eaa)
# Get a matrix of the coefficient values
baby_mat <- matrix(data = combo_baby$coef, byrow = FALSE, ncol = 4,nrow = 6)
rownames(baby_mat)<- c("Horvath","Horvath_cell",
"IEAA","IEAA_cell",
"EEAA","EEAA_cell")
colnames(baby_mat) <- c("general trauma","sexual trauma",
"war trauma", "chronic stress")
# Subset for the tests we want:
baby_mat <- baby_mat[-(grep("_cell",rownames(baby_mat))),]
# Get a matrix of the p values
pval_baby <- matrix(data = combo_baby$pval, byrow = FALSE, ncol = 4,nrow = 6)
rownames(pval_baby)<- c("Horvath","Horvath_cell",
"IEAA","IEAA_cell",
"EEAA","EEAA_cell")
colnames(pval_baby) <- c("general trauma","sexual trauma",
"war trauma", "chronic stress")
# Subset for the tests we want:
pval_baby <- pval_baby[-(grep("_cell",rownames(pval_baby))),]
print(baby_mat)
## general trauma sexual trauma war trauma chronic stress
## Horvath -0.01825929 -0.007533342 -0.02851414 -0.004013180
## IEAA -0.12706067 0.045669226 -0.17196381 0.006182543
## EEAA 0.69677656 0.297129153 1.12135175 0.072583576
print(pval_baby)
## general trauma sexual trauma war trauma chronic stress
## Horvath 0.21230177 0.6276206 0.28782919 0.6021742
## IEAA 0.14631021 0.6229336 0.28379519 0.8932359
## EEAA 0.02400518 0.3668638 0.04776305 0.6569516
col_fun = colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))
baby_eaa_heatmap <- Heatmap(baby_mat,
name = "beta",
col = col_fun,
row_title = "Newborns",
cluster_rows = FALSE,
cluster_columns = FALSE,
column_names_rot = 0,
column_names_centered = TRUE,
cell_fun = function(j, i, x, y, w, h, fill) {
if (pval_baby[i, j] < 0.001) {
grid.text("***", x, y)
} else if (pval_baby[i, j] < 0.01) {
grid.text("**", x, y)
} else if (pval_baby[i, j] < 0.05) {
grid.text("*", x, y)
}
},
row_gap = unit(0, "mm"), border = TRUE)
Should be using beta regression for this since the range is 0-1. First mothers.
# There is a problem with the NK column because several people have
# a value indistinguishable from zero. Add 0.000000001 to each measurement
# just to make the function run.
dfm$NK <- dfm$NK + 1e-09
mom_cell_cols <- c("Bcell","CD4T","CD8T","Mono","Neu","NK")
mom_gtsum_cell <- apply(dfm[mom_cell_cols], 2, function(x){
res <- tidy(betareg(x ~ bmi + pcsec + palco + parous + cohort + age + ga_meth
+ gtsum,
data = dfm), conf.int = TRUE)
return(res)
})
mom_setot_cell <- apply(dfm[mom_cell_cols], 2, function(x){
res <- tidy(betareg(x ~ bmi + pcsec + palco + parous + cohort + age + ga_meth
+ setot,
data = dfm), conf.int = TRUE)
return(res)
})
mom_awar_nr_cell <- apply(dfm[mom_cell_cols], 2, function(x){
res <- tidy(betareg(x ~ bmi + pcsec + palco + parous + cohort + age + ga_meth
+ awar_nr,
data = dfm), conf.int = TRUE)
return(res)
})
mom_achronic_cell <- apply(dfm[mom_cell_cols], 2, function(x){
res <- tidy(betareg(x ~ bmi + pcsec + palco + parous + cohort + age + ga_meth
+ achronic,
data = dfm), conf.int = TRUE)
return(res)
})
# For reasons I will never understand, the code here breaks when
# it is inside of a function, but works outside of a function.
# Just write out everything.
coef <- sapply(mom_gtsum_cell, function (x) x[[length(x[[1]])-1,"estimate"]])
std_error <- sapply(mom_gtsum_cell, function (x) x[[length(x[[1]])-1,"std.error"]])
test_stat <- sapply(mom_gtsum_cell, function (x) x[[length(x[[1]])-1,"statistic"]])
pval <- sapply(mom_gtsum_cell, function (x) x[[length(x[[1]])-1,"p.value"]])
conf_low <- sapply(mom_gtsum_cell, function (x) x[[length(x[[1]])-1,"conf.low"]])
conf_high <- sapply(mom_gtsum_cell, function (x) x[[length(x[[1]])-1,"conf.high"]])
df_mom_gtsum <- cbind.data.frame(coef,std_error,test_stat,pval,conf_low,conf_high)
coef <- sapply(mom_setot_cell, function (x) x[[length(x[[1]])-1,"estimate"]])
std_error <- sapply(mom_setot_cell, function (x) x[[length(x[[1]])-1,"std.error"]])
test_stat <- sapply(mom_setot_cell, function (x) x[[length(x[[1]])-1,"statistic"]])
pval <- sapply(mom_setot_cell, function (x) x[[length(x[[1]])-1,"p.value"]])
conf_low <- sapply(mom_setot_cell, function (x) x[[length(x[[1]])-1,"conf.low"]])
conf_high <- sapply(mom_setot_cell, function (x) x[[length(x[[1]])-1,"conf.high"]])
df_mom_setot <- cbind.data.frame(coef,std_error,test_stat,pval,conf_low,conf_high)
coef <- sapply(mom_awar_nr_cell, function (x) x[[length(x[[1]])-1,"estimate"]])
std_error <- sapply(mom_awar_nr_cell, function (x) x[[length(x[[1]])-1,"std.error"]])
test_stat <- sapply(mom_awar_nr_cell, function (x) x[[length(x[[1]])-1,"statistic"]])
pval <- sapply(mom_awar_nr_cell, function (x) x[[length(x[[1]])-1,"p.value"]])
conf_low <- sapply(mom_awar_nr_cell, function (x) x[[length(x[[1]])-1,"conf.low"]])
conf_high <- sapply(mom_awar_nr_cell, function (x) x[[length(x[[1]])-1,"conf.high"]])
df_mom_awar_nr <- cbind.data.frame(coef,std_error,test_stat,pval,conf_low,conf_high)
coef <- sapply(mom_achronic_cell, function (x) x[[length(x[[1]])-1,"estimate"]])
std_error <- sapply(mom_achronic_cell, function (x) x[[length(x[[1]])-1,"std.error"]])
test_stat <- sapply(mom_achronic_cell, function (x) x[[length(x[[1]])-1,"statistic"]])
pval <- sapply(mom_achronic_cell, function (x) x[[length(x[[1]])-1,"p.value"]])
conf_low <- sapply(mom_achronic_cell, function (x) x[[length(x[[1]])-1,"conf.low"]])
conf_high <- sapply(mom_achronic_cell, function (x) x[[length(x[[1]])-1,"conf.high"]])
df_mom_achronic <- cbind.data.frame(coef,std_error,test_stat,pval,conf_low,conf_high)
df_mom_gtsum <- df_mom_gtsum %>%
select(coef,pval) %>%
mutate(cell_type = c("Bcell","CD4T","CD8T","Mono","Neu","NK")) %>%
mutate(exposure = c("general trauma"))
df_mom_setot <- df_mom_setot %>%
select(coef,pval) %>%
mutate(cell_type = c("Bcell","CD4T","CD8T","Mono","Neu","NK")) %>%
mutate(exposure = c("sexual events"))
df_mom_awar_nr <- df_mom_awar_nr %>%
select(coef,pval) %>%
mutate(cell_type = c("Bcell","CD4T","CD8T","Mono","Neu","NK")) %>%
mutate(exposure = c("war stress"))
df_mom_achronic <- df_mom_achronic %>%
select(coef,pval) %>%
mutate(cell_type = c("Bcell","CD4T","CD8T","Mono","Neu","NK")) %>%
mutate(exposure = c("chronic stress"))
mom_combo_cell <- rbind(df_mom_gtsum,df_mom_setot,df_mom_awar_nr,df_mom_achronic)
# Get a matrix of the coefficient values
mom_mat_cell <- matrix(data = mom_combo_cell$coef, byrow = FALSE, ncol = 4,nrow = 6)
rownames(mom_mat_cell)<- c("Bcell","CD4T","CD8T","Mono","Neu","NK")
colnames(mom_mat_cell) <- c("general trauma","sexual events",
"war trauma", "chronic stress")
# Get a matrix of the p values
mom_mat_cell_pval <- matrix(data = mom_combo_cell$pval, byrow = FALSE, ncol = 4,nrow = 6)
rownames(mom_mat_cell_pval)<- c("Bcell","CD4T","CD8T","Mono","Neu","NK")
colnames(mom_mat_cell_pval) <- c("general trauma","sexual events",
"war trauma", "chronic stress")
col_fun2 = colorRamp2(c(-0.1,0, 0.1), c("blue","white", "red"))
mom_cell_heatmap <- Heatmap(mom_mat_cell,
name = "beta",
col = col_fun2,
column_title = "Maternal Stress - Results in Mothers",
row_title = "Immune Cell types",
cluster_rows = FALSE,
cluster_columns = FALSE,
cell_fun = function(j, i, x, y, w, h, fill) {
if (mom_mat_cell_pval[i, j] < 0.001) {
grid.text("***", x, y)
} else if (mom_mat_cell_pval[i, j] < 0.01) {
grid.text("**", x, y)
} else if (mom_mat_cell_pval[i, j] < 0.05) {
grid.text("*", x, y)
}
},
row_gap = unit(0, "mm"), border = TRUE)
# There is a problem with the NK column because several people have
# a value indistinguishable from zero. Add 0.000000001 to each measurement
# just to make the function run.
dfb$Bcell <- dfb$Bcell + 1e-09
dfb$CD4T <- dfb$CD4T + 1e-09
dfb$CD8T <- dfb$CD8T + 1e-09
dfb$NK <- dfb$NK + 1e-09
baby_cell_cols <- c("Bcell","CD4T","CD8T","Gran","Mono","NK","nRBC")
baby_gtsum_cell <- apply(dfb[baby_cell_cols], 2, function(x){
res <- tidy(betareg(x ~ bmi + pcsec + palco + parous + cohort + age + ga_meth
+ sex + gtsum,
data = dfb), conf.int = TRUE)
return(res)
})
baby_setot_cell <- apply(dfb[baby_cell_cols], 2, function(x){
res <- tidy(betareg(x ~ bmi + pcsec + palco + parous + cohort + age + ga_meth
+ sex + setot,
data = dfb), conf.int = TRUE)
return(res)
})
baby_awar_nr_cell <- apply(dfb[baby_cell_cols], 2, function(x){
res <- tidy(betareg(x ~ bmi + pcsec + palco + parous + cohort + age + ga_meth
+ sex + awar_nr,
data = dfb), conf.int = TRUE)
return(res)
})
baby_achronic_cell <- apply(dfb[baby_cell_cols], 2, function(x){
res <- tidy(betareg(x ~ bmi + pcsec + palco + parous + cohort + age + ga_meth
+ sex + achronic,
data = dfb), conf.int = TRUE)
return(res)
})
# For reasons I will never understand, the code here breaks when
# it is inside of a function, but works outside of a function.
# Just write out everything.
coef <- sapply(baby_gtsum_cell, function (x) x[[length(x[[1]])-1,"estimate"]])
std_error <- sapply(baby_gtsum_cell, function (x) x[[length(x[[1]])-1,"std.error"]])
test_stat <- sapply(baby_gtsum_cell, function (x) x[[length(x[[1]])-1,"statistic"]])
pval <- sapply(baby_gtsum_cell, function (x) x[[length(x[[1]])-1,"p.value"]])
conf_low <- sapply(baby_gtsum_cell, function (x) x[[length(x[[1]])-1,"conf.low"]])
conf_high <- sapply(baby_gtsum_cell, function (x) x[[length(x[[1]])-1,"conf.high"]])
df_baby_gtsum <- cbind.data.frame(coef,std_error,test_stat,pval,conf_low,conf_high)
coef <- sapply(baby_setot_cell, function (x) x[[length(x[[1]])-1,"estimate"]])
std_error <- sapply(baby_setot_cell, function (x) x[[length(x[[1]])-1,"std.error"]])
test_stat <- sapply(baby_setot_cell, function (x) x[[length(x[[1]])-1,"statistic"]])
pval <- sapply(baby_setot_cell, function (x) x[[length(x[[1]])-1,"p.value"]])
conf_low <- sapply(baby_setot_cell, function (x) x[[length(x[[1]])-1,"conf.low"]])
conf_high <- sapply(baby_setot_cell, function (x) x[[length(x[[1]])-1,"conf.high"]])
df_baby_setot <- cbind.data.frame(coef,std_error,test_stat,pval,conf_low,conf_high)
coef <- sapply(baby_awar_nr_cell, function (x) x[[length(x[[1]])-1,"estimate"]])
std_error <- sapply(baby_awar_nr_cell, function (x) x[[length(x[[1]])-1,"std.error"]])
test_stat <- sapply(baby_awar_nr_cell, function (x) x[[length(x[[1]])-1,"statistic"]])
pval <- sapply(baby_awar_nr_cell, function (x) x[[length(x[[1]])-1,"p.value"]])
conf_low <- sapply(baby_awar_nr_cell, function (x) x[[length(x[[1]])-1,"conf.low"]])
conf_high <- sapply(baby_awar_nr_cell, function (x) x[[length(x[[1]])-1,"conf.high"]])
df_baby_awar_nr <- cbind.data.frame(coef,std_error,test_stat,pval,conf_low,conf_high)
coef <- sapply(baby_achronic_cell, function (x) x[[length(x[[1]])-1,"estimate"]])
std_error <- sapply(baby_achronic_cell, function (x) x[[length(x[[1]])-1,"std.error"]])
test_stat <- sapply(baby_achronic_cell, function (x) x[[length(x[[1]])-1,"statistic"]])
pval <- sapply(baby_achronic_cell, function (x) x[[length(x[[1]])-1,"p.value"]])
conf_low <- sapply(baby_achronic_cell, function (x) x[[length(x[[1]])-1,"conf.low"]])
conf_high <- sapply(baby_achronic_cell, function (x) x[[length(x[[1]])-1,"conf.high"]])
df_baby_achronic <- cbind.data.frame(coef,std_error,test_stat,pval,conf_low,conf_high)
df_baby_gtsum <- df_baby_gtsum %>%
select(coef,pval) %>%
mutate(cell_type = c("Bcell","CD4T","CD8T","Gran","Mono","NK","nRBC")) %>%
mutate(exposure = c("general trauma"))
df_baby_setot <- df_baby_setot %>%
select(coef,pval) %>%
mutate(cell_type = c("Bcell","CD4T","CD8T","Gran","Mono","NK","nRBC")) %>%
mutate(exposure = c("sexual events"))
df_baby_awar_nr <- df_baby_awar_nr %>%
select(coef,pval) %>%
mutate(cell_type = c("Bcell","CD4T","CD8T","Gran","Mono","NK","nRBC")) %>%
mutate(exposure = c("war stress"))
df_baby_achronic <- df_baby_achronic %>%
select(coef,pval) %>%
mutate(cell_type = c("Bcell","CD4T","CD8T","Gran","Mono","NK","nRBC")) %>%
mutate(exposure = c("chronic stress"))
baby_combo_cell <- rbind(df_baby_gtsum,df_baby_setot,df_baby_awar_nr,df_baby_achronic)
# Get a matrix of the coefficient values
baby_mat_cell <- matrix(data = baby_combo_cell$coef, byrow = FALSE, ncol = 4,nrow = 7)
rownames(baby_mat_cell)<- c("Bcell","CD4T","CD8T","Gran","Mono","NK","nRBC")
colnames(baby_mat_cell) <- c("general trauma","sexual events",
"war trauma", "chronic stress")
# Get a matrix of the p values
baby_mat_cell_pval <- matrix(data = baby_combo_cell$pval, byrow = FALSE, ncol = 4,nrow = 7)
rownames(baby_mat_cell_pval)<- c("Bcell","CD4T","CD8T","Gran","Mono","NK","nRBC")
colnames(baby_mat_cell_pval) <- c("general trauma","sexual events",
"war trauma", "chronic stress")
col_fun2 = colorRamp2(c(-0.1,0, 0.1), c("blue","white", "red"))
baby_cell_heatmap <- Heatmap(baby_mat_cell,
name = "beta",
col = col_fun2,
column_title = "Maternal Stress - Results in Babies",
row_title = "Immune Cell types",
cluster_rows = FALSE,
cluster_columns = FALSE,
cell_fun = function(j, i, x, y, w, h, fill) {
if (baby_mat_cell_pval[i, j] < 0.001) {
grid.text("***", x, y)
} else if (baby_mat_cell_pval[i, j] < 0.01) {
grid.text("**", x, y)
} else if (baby_mat_cell_pval[i, j] < 0.05) {
grid.text("*", x, y)
}
},
row_gap = unit(0, "mm"), border = TRUE)
# Make a pvalue function for table1:
pvalue_table <- function(x, ...) {
x <- x[-length(x)] # Remove "overall" group
# Construct vectors of data y, and groups (strata) g
y <- unlist(x)
g <- factor(rep(1:length(x), times=sapply(x, length)))
if (is.numeric(y)) {
# For numeric variables, perform an ANOVA
p <- summary(aov(y ~ g))[[1]][["Pr(>F)"]][1]
} else {
# For categorical variables, perform a chi-squared test of independence
p <- fisher.test(table(y, g))$p.value
}
# Format the p-value, using an HTML entity for the less-than sign.
# The initial empty string places the output on the line below the variable label.
c("", sub("<", "<", format.pval(p, digits=3, eps=0.001)))
}
dfm$Delivery_Mode <- factor(dfm$pcsec, levels = c(0,1),
labels = c("vaginal","caesarean section"))
dfm$Alcohol <- factor(dfm$palco, levels = c(0,1),
labels = c("No","Yes"))
dfm$Parity <- factor(dfm$parous, levels = c(0,1),
labels = c("Multigravida","Primigravida"))
dfm$Gestational_Age <- dfm$ga_meth/7
dfm$General_Trauma <- dfm$gtsum
dfm$Sexual_Events <- dfm$setot
dfm$War_Stress <- dfm$awar_nr
dfm$Chronic_Stress <- dfm$achronic
dfm$Cohort <- factor(dfm$cohort, levels = c("C","SV"),
labels = c("General Maternity Ward",
"Sexual Violence Ward"))
table1(~ age + bmi + Delivery_Mode + Alcohol + Parity + Gestational_Age +
General_Trauma + Sexual_Events + War_Stress + Chronic_Stress
| Cohort, data=dfm, overall="Total",
extra.col=list(`p`=pvalue_table), extra.col.pos=3)
| General Maternity Ward (N=86) |
Sexual Violence Ward (N=65) |
p | Total (N=151) |
|
|---|---|---|---|---|
| age | ||||
| Mean (SD) | 26.2 (6.07) | 17.3 (3.99) | <0.001 | 22.5 (6.86) |
| Median [Min, Max] | 25.0 [14.0, 42.0] | 17.0 [12.0, 33.0] | 21.0 [12.0, 42.0] | |
| Missing | 0 (0%) | 2 (3.1%) | 2 (1.3%) | |
| bmi | ||||
| Mean (SD) | 28.2 (4.15) | 25.8 (2.78) | <0.001 | 27.2 (3.81) |
| Median [Min, Max] | 27.1 [21.4, 39.6] | 25.7 [21.0, 33.7] | 26.3 [21.0, 39.6] | |
| Missing | 0 (0%) | 1 (1.5%) | 1 (0.7%) | |
| Delivery_Mode | ||||
| vaginal | 70 (81.4%) | 48 (73.8%) | 0.321 | 118 (78.1%) |
| caesarean section | 16 (18.6%) | 17 (26.2%) | 33 (21.9%) | |
| Alcohol | ||||
| No | 71 (82.6%) | 56 (86.2%) | 0.353 | 127 (84.1%) |
| Yes | 15 (17.4%) | 7 (10.8%) | 22 (14.6%) | |
| Missing | 0 (0%) | 2 (3.1%) | 2 (1.3%) | |
| Parity | ||||
| Multigravida | 59 (68.6%) | 8 (12.3%) | <0.001 | 67 (44.4%) |
| Primigravida | 24 (27.9%) | 55 (84.6%) | 79 (52.3%) | |
| Missing | 3 (3.5%) | 2 (3.1%) | 5 (3.3%) | |
| Gestational_Age | ||||
| Mean (SD) | 40.1 (1.02) | 39.5 (1.43) | 0.008 | 39.9 (1.24) |
| Median [Min, Max] | 40.2 [37.1, 42.6] | 39.8 [33.2, 41.5] | 40.1 [33.2, 42.6] | |
| Missing | 3 (3.5%) | 2 (3.1%) | 5 (3.3%) | |
| General_Trauma | ||||
| Mean (SD) | 2.45 (1.84) | 2.22 (1.45) | 0.391 | 2.35 (1.68) |
| Median [Min, Max] | 2.00 [0, 8.00] | 2.00 [0, 8.00] | 2.00 [0, 8.00] | |
| Sexual_Events | ||||
| Mean (SD) | 0.942 (1.63) | 1.72 (1.60) | 0.004 | 1.28 (1.66) |
| Median [Min, Max] | 0 [0, 6.00] | 1.00 [0, 6.00] | 1.00 [0, 6.00] | |
| War_Stress | ||||
| Mean (SD) | 1.05 (0.893) | 0.954 (0.959) | 0.542 | 1.01 (0.920) |
| Median [Min, Max] | 1.00 [0, 5.00] | 1.00 [0, 4.00] | 1.00 [0, 5.00] | |
| Chronic_Stress | ||||
| Mean (SD) | 4.49 (3.54) | 8.43 (2.87) | <0.001 | 6.19 (3.80) |
| Median [Min, Max] | 3.00 [0, 14.0] | 9.00 [0, 14.0] | 6.00 [0, 14.0] |
Make a descriptive statistics table that is inclusive of the participants included in the epigenetic age analyses.
mom_mage$Delivery_Mode <- factor(mom_mage$pcsec, levels = c(0,1),
labels = c("vaginal","caesarean section"))
mom_mage$Alcohol <- factor(mom_mage$palco, levels = c(0,1),
labels = c("No","Yes"))
mom_mage$Parity <- factor(mom_mage$parous, levels = c(0,1),
labels = c("Multigravida","Primigravida"))
mom_mage$Gestational_Age <- mom_mage$ga_meth/7
mom_mage$General_Trauma <- mom_mage$gtsum
mom_mage$Sexual_Events <- mom_mage$setot
mom_mage$War_Stress <- mom_mage$awar_nr
mom_mage$Chronic_Stress <- mom_mage$achronic
mom_mage$Cohort <- factor(mom_mage$cohort, levels = c("C","SV"),
labels = c("General Maternity Ward",
"Sexual Violence Ward"))
table1(~ age + bmi + Delivery_Mode + Alcohol + Parity + Gestational_Age +
General_Trauma + Sexual_Events + War_Stress + Chronic_Stress
| Cohort, data=mom_mage, overall="Total",
extra.col=list(`p`=pvalue_table), extra.col.pos=3)
| General Maternity Ward (N=90) |
Sexual Violence Ward (N=65) |
p | Total (N=155) |
|
|---|---|---|---|---|
| age | ||||
| Mean (SD) | 26.3 (5.95) | 17.3 (3.99) | <0.001 | 22.6 (6.83) |
| Median [Min, Max] | 26.0 [14.0, 42.0] | 17.0 [12.0, 33.0] | 21.0 [12.0, 42.0] | |
| Missing | 0 (0%) | 2 (3.1%) | 2 (1.3%) | |
| bmi | ||||
| Mean (SD) | 28.1 (4.08) | 25.8 (2.78) | <0.001 | 27.2 (3.76) |
| Median [Min, Max] | 27.0 [21.4, 39.6] | 25.7 [21.0, 33.7] | 26.3 [21.0, 39.6] | |
| Missing | 0 (0%) | 1 (1.5%) | 1 (0.6%) | |
| Delivery_Mode | ||||
| vaginal | 73 (81.1%) | 48 (73.8%) | 0.327 | 121 (78.1%) |
| caesarean section | 17 (18.9%) | 17 (26.2%) | 34 (21.9%) | |
| Alcohol | ||||
| No | 75 (83.3%) | 56 (86.2%) | 0.362 | 131 (84.5%) |
| Yes | 15 (16.7%) | 7 (10.8%) | 22 (14.2%) | |
| Missing | 0 (0%) | 2 (3.1%) | 2 (1.3%) | |
| Parity | ||||
| Multigravida | 63 (70.0%) | 8 (12.3%) | <0.001 | 71 (45.8%) |
| Primigravida | 24 (26.7%) | 55 (84.6%) | 79 (51.0%) | |
| Missing | 3 (3.3%) | 2 (3.1%) | 5 (3.2%) | |
| Gestational_Age | ||||
| Mean (SD) | 40.1 (1.00) | 39.5 (1.43) | 0.006 | 39.9 (1.23) |
| Median [Min, Max] | 40.3 [37.1, 42.6] | 39.8 [33.2, 41.5] | 40.1 [33.2, 42.6] | |
| Missing | 4 (4.4%) | 2 (3.1%) | 6 (3.9%) | |
| General_Trauma | ||||
| Mean (SD) | 2.46 (1.88) | 2.22 (1.45) | 0.391 | 2.35 (1.71) |
| Median [Min, Max] | 2.00 [0, 8.00] | 2.00 [0, 8.00] | 2.00 [0, 8.00] | |
| Sexual_Events | ||||
| Mean (SD) | 0.956 (1.63) | 1.72 (1.60) | 0.004 | 1.28 (1.66) |
| Median [Min, Max] | 0 [0, 6.00] | 1.00 [0, 6.00] | 1.00 [0, 6.00] | |
| War_Stress | ||||
| Mean (SD) | 1.04 (0.886) | 0.954 (0.959) | 0.545 | 1.01 (0.915) |
| Median [Min, Max] | 1.00 [0, 5.00] | 1.00 [0, 4.00] | 1.00 [0, 5.00] | |
| Chronic_Stress | ||||
| Mean (SD) | 4.63 (3.60) | 8.43 (2.87) | <0.001 | 6.23 (3.80) |
| Median [Min, Max] | 3.00 [0, 14.0] | 9.00 [0, 14.0] | 6.00 [0, 14.0] |
dfb$Delivery_Mode <- factor(dfb$pcsec, levels = c(0,1),
labels = c("vaginal","caesarean section"))
dfb$Alcohol <- factor(dfb$palco, levels = c(0,1),
labels = c("No","Yes"))
dfb$Parity <- factor(dfb$parous, levels = c(0,1),
labels = c("Multigravida","Primigravida"))
dfb$Gestational_Age <- dfb$ga_meth/7
dfb$General_Trauma <- dfb$gtsum
dfb$Sexual_Events <- dfb$setot
dfb$War_Stress <- dfb$awar_nr
dfb$Chronic_Stress <- dfb$achronic
dfb$Cohort <- factor(dfb$cohort, levels = c("C","SV"),
labels = c("General Maternity Ward",
"Sexual Violence Ward"))
dfb$Sex <- factor(dfb$sex, levels = c("F","M"),
labels = c("Female","Male"))
table1(~ Sex + bmi + Delivery_Mode + Alcohol + Parity + Gestational_Age +
General_Trauma + Sexual_Events + War_Stress + Chronic_Stress
| Cohort, data=dfb, overall="Total",
extra.col=list(`p`=pvalue_table), extra.col.pos=3)
| General Maternity Ward (N=86) |
Sexual Violence Ward (N=65) |
p | Total (N=151) |
|
|---|---|---|---|---|
| Sex | ||||
| Female | 43 (50.0%) | 33 (50.8%) | 1 | 76 (50.3%) |
| Male | 43 (50.0%) | 32 (49.2%) | 75 (49.7%) | |
| bmi | ||||
| Mean (SD) | 28.2 (4.17) | 25.9 (2.80) | <0.001 | 27.2 (3.81) |
| Median [Min, Max] | 27.0 [21.4, 39.6] | 25.8 [21.0, 33.7] | 26.4 [21.0, 39.6] | |
| Missing | 0 (0%) | 1 (1.5%) | 1 (0.7%) | |
| Delivery_Mode | ||||
| vaginal | 69 (80.2%) | 46 (70.8%) | 0.184 | 115 (76.2%) |
| caesarean section | 17 (19.8%) | 19 (29.2%) | 36 (23.8%) | |
| Alcohol | ||||
| No | 71 (82.6%) | 55 (84.6%) | 0.355 | 126 (83.4%) |
| Yes | 15 (17.4%) | 7 (10.8%) | 22 (14.6%) | |
| Missing | 0 (0%) | 3 (4.6%) | 3 (2.0%) | |
| Parity | ||||
| Multigravida | 62 (72.1%) | 8 (12.3%) | <0.001 | 70 (46.4%) |
| Primigravida | 22 (25.6%) | 54 (83.1%) | 76 (50.3%) | |
| Missing | 2 (2.3%) | 3 (4.6%) | 5 (3.3%) | |
| Gestational_Age | ||||
| Mean (SD) | 40.1 (1.00) | 39.6 (1.44) | 0.011 | 39.9 (1.23) |
| Median [Min, Max] | 40.3 [37.1, 42.6] | 39.8 [33.2, 41.5] | 40.1 [33.2, 42.6] | |
| General_Trauma | ||||
| Mean (SD) | 2.41 (1.82) | 2.22 (1.46) | 0.488 | 2.32 (1.68) |
| Median [Min, Max] | 2.00 [0, 8.00] | 2.00 [0, 8.00] | 2.00 [0, 8.00] | |
| Sexual_Events | ||||
| Mean (SD) | 0.919 (1.65) | 1.69 (1.60) | 0.004 | 1.25 (1.67) |
| Median [Min, Max] | 0 [0, 6.00] | 1.00 [0, 6.00] | 1.00 [0, 6.00] | |
| War_Stress | ||||
| Mean (SD) | 1.06 (0.886) | 0.969 (0.968) | 0.558 | 1.02 (0.920) |
| Median [Min, Max] | 1.00 [0, 5.00] | 1.00 [0, 4.00] | 1.00 [0, 5.00] | |
| Chronic_Stress | ||||
| Mean (SD) | 4.51 (3.63) | 8.40 (3.10) | <0.001 | 6.19 (3.91) |
| Median [Min, Max] | 3.00 [0, 14.0] | 9.00 [0, 14.0] | 6.00 [0, 14.0] |
Make a descriptive statistics table that is inclusive of the newborns included in the epigenetic age analyses.
baby_mage$Delivery_Mode <- factor(baby_mage$pcsec, levels = c(0,1),
labels = c("vaginal","caesarean section"))
baby_mage$Alcohol <- factor(baby_mage$palco, levels = c(0,1),
labels = c("No","Yes"))
baby_mage$Parity <- factor(baby_mage$parous, levels = c(0,1),
labels = c("Multigravida","Primigravida"))
baby_mage$Gestational_Age <- baby_mage$ga_meth/7
baby_mage$General_Trauma <- baby_mage$gtsum
baby_mage$Sexual_Events <- baby_mage$setot
baby_mage$War_Stress <- baby_mage$awar_nr
baby_mage$Chronic_Stress <- baby_mage$achronic
baby_mage$Cohort <- factor(baby_mage$cohort, levels = c("C","SV"),
labels = c("General Maternity Ward",
"Sexual Violence Ward"))
baby_mage$Sex <- factor(baby_mage$sex, levels = c("F","M"),
labels = c("Female","Male"))
table1(~ Sex + age + bmi + Delivery_Mode + Alcohol + Parity + Gestational_Age +
General_Trauma + Sexual_Events + War_Stress + Chronic_Stress
| Cohort, data=baby_mage, overall="Total",
extra.col=list(`p`=pvalue_table), extra.col.pos=3)
| General Maternity Ward (N=90) |
Sexual Violence Ward (N=65) |
p | Total (N=155) |
|
|---|---|---|---|---|
| Sex | ||||
| Female | 43 (47.8%) | 33 (50.8%) | 0.747 | 76 (49.0%) |
| Male | 47 (52.2%) | 32 (49.2%) | 79 (51.0%) | |
| age | ||||
| Mean (SD) | 26.2 (5.92) | 17.5 (3.97) | <0.001 | 22.6 (6.75) |
| Median [Min, Max] | 26.0 [14.0, 42.0] | 17.0 [13.0, 33.0] | 21.0 [13.0, 42.0] | |
| Missing | 0 (0%) | 3 (4.6%) | 3 (1.9%) | |
| bmi | ||||
| Mean (SD) | 28.1 (4.09) | 25.9 (2.80) | <0.001 | 27.2 (3.76) |
| Median [Min, Max] | 27.0 [21.4, 39.6] | 25.8 [21.0, 33.7] | 26.4 [21.0, 39.6] | |
| Missing | 0 (0%) | 1 (1.5%) | 1 (0.6%) | |
| Delivery_Mode | ||||
| vaginal | 73 (81.1%) | 46 (70.8%) | 0.177 | 119 (76.8%) |
| caesarean section | 17 (18.9%) | 19 (29.2%) | 36 (23.2%) | |
| Alcohol | ||||
| No | 75 (83.3%) | 55 (84.6%) | 0.483 | 130 (83.9%) |
| Yes | 15 (16.7%) | 7 (10.8%) | 22 (14.2%) | |
| Missing | 0 (0%) | 3 (4.6%) | 3 (1.9%) | |
| Parity | ||||
| Multigravida | 63 (70.0%) | 8 (12.3%) | <0.001 | 71 (45.8%) |
| Primigravida | 24 (26.7%) | 54 (83.1%) | 78 (50.3%) | |
| Missing | 3 (3.3%) | 3 (4.6%) | 6 (3.9%) | |
| Gestational_Age | ||||
| Mean (SD) | 40.1 (1.00) | 39.6 (1.44) | 0.011 | 39.9 (1.23) |
| Median [Min, Max] | 40.3 [37.1, 42.6] | 39.8 [33.2, 41.5] | 40.1 [33.2, 42.6] | |
| Missing | 4 (4.4%) | 0 (0%) | 4 (2.6%) | |
| General_Trauma | ||||
| Mean (SD) | 2.42 (1.90) | 2.22 (1.46) | 0.464 | 2.34 (1.73) |
| Median [Min, Max] | 2.00 [0, 8.00] | 2.00 [0, 8.00] | 2.00 [0, 8.00] | |
| Sexual_Events | ||||
| Mean (SD) | 0.911 (1.63) | 1.69 (1.60) | 0.003 | 1.24 (1.66) |
| Median [Min, Max] | 0 [0, 6.00] | 1.00 [0, 6.00] | 1.00 [0, 6.00] | |
| War_Stress | ||||
| Mean (SD) | 1.04 (0.886) | 0.969 (0.968) | 0.617 | 1.01 (0.919) |
| Median [Min, Max] | 1.00 [0, 5.00] | 1.00 [0, 4.00] | 1.00 [0, 5.00] | |
| Chronic_Stress | ||||
| Mean (SD) | 4.57 (3.58) | 8.40 (3.10) | <0.001 | 6.17 (3.87) |
| Median [Min, Max] | 3.00 [0, 14.0] | 9.00 [0, 14.0] | 6.00 [0, 14.0] |
mom_sig_gtsum <- mom_gtsum_man2 %>%
filter(pval < (0.05/nrow(mom_gtsum_man2))) %>%
select(probe,coef,pval) %>%
mutate(exposure = c("general_trauma")) %>%
mutate(generation = c("mother"))
mom_sig_setot <- mom_setot_man2 %>%
filter(pval < (0.05/nrow(mom_setot_man2))) %>%
select(probe,coef,pval) %>%
mutate(exposure = c("sexual_events"))%>%
mutate(generation = c("mother"))
mom_sig_awar_nr <- mom_awar_nr_man2 %>%
filter(pval < (0.05/nrow(mom_awar_nr_man2))) %>%
select(probe,coef,pval) %>%
mutate(exposure = c("war_stress"))%>%
mutate(generation = c("mother"))
mom_sig_achronic <- mom_achronic_man2 %>%
filter(pval < (0.05/nrow(mom_achronic_man2))) %>%
select(probe,coef,pval) %>%
mutate(exposure = c("chronic_stress"))%>%
mutate(generation = c("mother"))
baby_sig_gtsum <- baby_gtsum_man2 %>%
filter(pval < (0.05/nrow(baby_gtsum_man2))) %>%
select(probe,coef,pval) %>%
mutate(exposure = c("general_trauma")) %>%
mutate(generation = c("baby"))
baby_sig_setot <- baby_setot_man2 %>%
filter(pval < (0.05/nrow(baby_setot_man2))) %>%
select(probe,coef,pval) %>%
mutate(exposure = c("sexual_events"))%>%
mutate(generation = c("baby"))
baby_sig_awar_nr <- baby_awar_nr_man2 %>%
filter(pval < (0.05/nrow(baby_awar_nr_man2))) %>%
select(probe,coef,pval) %>%
mutate(exposure = c("war_stress"))%>%
mutate(generation = c("baby"))
baby_sig_achronic <- baby_achronic_man2 %>%
filter(pval < (0.05/nrow(baby_achronic_man2))) %>%
select(probe,coef,pval) %>%
mutate(exposure = c("chronic_stress")) %>%
mutate(generation = c("baby"))
mom_all_sig <- bind_rows(mom_sig_gtsum,
mom_sig_setot,
mom_sig_awar_nr,
mom_sig_achronic)
baby_all_sig <- bind_rows(baby_sig_gtsum,
baby_sig_setot,
baby_sig_awar_nr,
baby_sig_achronic)
library(scales)
mom_all_sig2 <- zhou %>%
select(probeID,gene) %>%
right_join(mom_all_sig, by = c("probeID" = "probe")) %>%
mutate(coefficient = round(coef, digits = 3)) %>%
mutate(p_value = scientific(pval, digits = 3)) %>%
left_join(zhou2, by = c("probeID")) %>%
left_join(illumina2, by = c("probeID")) %>%
select(probeID,coefficient,p_value,exposure,
generation,gene,CGIposition,distToTSS,gene_context)
baby_all_sig2 <- zhou %>%
select(probeID,gene) %>%
right_join(baby_all_sig, by = c("probeID" = "probe")) %>%
mutate(coefficient = round(coef, digits = 3)) %>%
mutate(p_value = scientific(pval, digits = 3)) %>%
left_join(zhou2, by = c("probeID")) %>%
left_join(illumina2, by = c("probeID")) %>%
select(probeID,coefficient,p_value,exposure,
generation,gene,CGIposition,distToTSS,gene_context)
datatable(mom_all_sig2[c("exposure","probeID","coefficient","p_value","gene","CGIposition",
"gene_context")],
filter = "top", rownames = FALSE, width = '100%',
options = list(scrollX = TRUE),
caption = htmltools::tags$caption(style = 'caption-side: top; text-align: center; color:black; font-size:200% ;','All significant hits in mothers across four maternal stress EWAS'))
# Table of significant hits for mothers
mom_all_sig2 %>%
select(exposure,probeID,coefficient,p_value,gene,CGIposition,gene_context) %>%
arrange(exposure) %>%
kbl() %>%
kable_styling(full_width = FALSE) %>%
kable_material() %>%
pack_rows(group_label = "General Trauma",1,4) %>%
pack_rows(group_label = "Sexual Events",5,13) %>%
pack_rows(group_label = "War Stress",14,15)
| exposure | probeID | coefficient | p_value | gene | CGIposition | gene_context |
|---|---|---|---|---|---|---|
| General Trauma | ||||||
| general_trauma | cg11408019 | 0.003 | 1.10e-10 | NA | Island | |
| general_trauma | cg14519777 | 0.006 | 6.87e-08 | CTA-339C12.1;CUX1 | NA | Body;Body;Body |
| general_trauma | cg14282695 | 0.009 | 1.48e-08 | SAMD4A | NA | Body;Body |
| general_trauma | cg16543391 | -0.001 | 6.25e-08 | EML2;MIR330 | N_Shelf | TSS1500;TSS200;TSS200;Body;Body |
| Sexual Events | ||||||
| sexual_events | cg21219607 | -0.002 | 5.50e-08 | PARP15 | NA | TSS1500;TSS1500;Body;Body |
| sexual_events | cg06308131 | -0.004 | 9.71e-10 | MUC4 | S_Shore | Body;Body;Body |
| sexual_events | cg04358942 | -0.001 | 2.86e-08 | RP11-64D24.2 | NA | |
| sexual_events | cg23527517 | -0.008 | 1.23e-08 | OTOP3 | NA | Body |
| sexual_events | cg14859642 | 0.007 | 6.99e-08 | NA | NA | |
| sexual_events | cg00489624 | -0.003 | 1.10e-08 | DLGAP4 | NA | 5’UTR |
| sexual_events | cg24308336 | -0.007 | 3.79e-08 | ARHGAP40 | N_Shelf | Body |
| sexual_events | cg16765764 | -0.002 | 9.46e-09 | DHX35 | NA | Body;Body;Body |
| sexual_events | cg10897169 | -0.003 | 6.81e-08 | BCAS4 | NA | Body;Body;Body |
| War Stress | ||||||
| war_stress | cg13740840 | -0.002 | 5.99e-08 | KIF15;MIR564;TMEM42 | Island | TSS1500;TSS200 |
| war_stress | cg26486174 | 0.013 | 1.61e-08 | NA | Island | |
datatable(baby_all_sig2[c("exposure",
"probeID",
"coefficient",
"p_value",
"gene",
"CGIposition",
"gene_context")],
filter = "top", rownames = FALSE, width = '100%',
options = list(scrollX = TRUE),
caption = htmltools::tags$caption(style = 'caption-side: top; text-align: center; color:black; font-size:200% ;','All significant hits in babies across four maternal stress EWAS'))
# Table of significant hits for mothers
baby_all_sig2 %>%
select(exposure,probeID,coefficient,p_value,gene,CGIposition,gene_context) %>%
arrange(exposure) %>%
kbl() %>%
kable_styling(full_width = FALSE) %>%
kable_material() %>%
pack_rows(group_label = "General Trauma",1,2) %>%
pack_rows(group_label = "Sexual Events",3,8) %>%
pack_rows(group_label = "War Stress",9,11)
| exposure | probeID | coefficient | p_value | gene | CGIposition | gene_context |
|---|---|---|---|---|---|---|
| General Trauma | ||||||
| general_trauma | cg24590750 | 0.000 | 4.25e-08 | TBPL1 | Island | TSS1500;TSS200 |
| general_trauma | cg10783680 | 0.000 | 6.23e-08 | EXOC7 | Island | 5’UTR;1stExon;5’UTR;1stExon;5’UTR;1stExon;5’UTR;1stExon;1stExon;5’UTR;TSS200 |
| Sexual Events | ||||||
| sexual_events | cg10338475 | 0.006 | 4.74e-11 | CTC-436P18.1;SMIM15 | S_Shore | TSS1500;Body |
| sexual_events | cg11386818 | 0.002 | 3.97e-09 | SYNCRIP | Island | TSS1500;5’UTR;5’UTR;5’UTR;TSS1500;5’UTR |
| sexual_events | cg02176407 | 0.003 | 5.69e-08 | POU3F2 | Island | 1stExon |
| sexual_events | cg20807701 | 0.003 | 2.80e-11 | DNAJB9;PNPLA8;THAP5 | N_Shore | Body;TSS1500;5’UTR;1stExon |
| sexual_events | cg09631059 | -0.005 | 8.48e-09 | RP11-1391J7.1;TSPAN4 | N_Shore | 5’UTR;Body;Body;Body;Body;Body;Body |
| sexual_events | cg06873316 | 0.001 | 4.03e-08 | NELL1 | Island | TSS1500;TSS1500 |
| War Stress | ||||||
| war_stress | cg08985979 | 0.006 | 2.10e-08 | AC108142.1 | S_Shore | Body |
| war_stress | cg21172322 | 0.018 | 5.88e-10 | BCAT1;RP11-662I13.3 | N_Shore | Body |
| war_stress | cg00741900 | 0.007 | 2.98e-08 | DIO3;DIO3OS;MIR1247 | Island | 5’UTR;1stExon;TSS1500 |
library(patchwork)
# Print all manhattan plots
mom_gtsum_man + mom_setot_man + mom_awar_nr_man +
mom_achronic_man + baby_gtsum_man + baby_setot_man + baby_awar_nr_man +
baby_achronic_man + plot_layout(ncol = 2) + plot_annotation(tag_levels = "A")
within_stress_venn
between_stress_venn
ggarrange(mom_gtsum_pred_plots$cg11408019,
mom_gtsum_pred_plots$cg14519777,
mom_gtsum_pred_plots$cg14282695,
mom_gtsum_pred_plots$cg16543391,
ncol = 2, nrow = 2)
ggarrange(mom_setot_pred_plots$cg21219607,
mom_setot_pred_plots$cg06308131,
mom_setot_pred_plots$cg04358942,
mom_setot_pred_plots$cg23527517,
mom_setot_pred_plots$cg14859642,
mom_setot_pred_plots$cg00489624,
mom_setot_pred_plots$cg24308336,
mom_setot_pred_plots$cg16765764,
mom_setot_pred_plots$cg10897169,
ncol = 2, nrow = 5)
ggarrange(mom_awar_nr_pred_plots$cg13740840,
mom_awar_nr_pred_plots$cg26486174,
ncol = 2, nrow = 1)
Covariates controlled for in all analyses of mothers include: Body mass index, delivery mode (vaginal vs c-section), parity (yes vs no), alcohol use in pregnancy, age of the mother, gestational age of the baby, first principal component of cell type (which explains 89% of the variance in cell type for mothers), and cohort (c100 vs sv).
Covariates controlled for in all analyses of babies include: Maternal BMI, delivery mode (vaginal vs c-section), parity (yes vs no), maternal age, gestational age, infant sex, cohort (c100 or sv), and the first two principal components of cell type in babies, which explain 90% of the variance in cell type.
ggarrange(baby_gtsum_pred_plots$cg24590750,
baby_gtsum_pred_plots$cg10783680,
ncol = 2, nrow = 1)
ggarrange(baby_setot_pred_plots$cg10338475,
baby_setot_pred_plots$cg11386818,
baby_setot_pred_plots$cg02176407,
baby_setot_pred_plots$cg20807701,
baby_setot_pred_plots$cg09631059,
baby_setot_pred_plots$cg06873316,
ncol = 2, nrow = 3)
ggarrange(baby_awar_nr_pred_plots$cg08985979,
baby_awar_nr_pred_plots$cg21172322,
baby_awar_nr_pred_plots$cg00741900,
ncol = 2, nrow = 2)
Methylation and birthweight
meth_bwgt_model <- lm(bwgt ~ age + bmi + parous + pcsec +
palco + cohort + PC1_cells + PC2_cells + sex + ga_meth +
cg08985979,
data = dfb_sig_awar_nr)
nrow(model.frame(meth_bwgt_model))
## [1] 144
tidy(meth_bwgt_model)
## # A tibble: 12 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -3106. 1400. -2.22 0.0282
## 2 age 18.2 8.13 2.23 0.0273
## 3 bmi 26.7 10.0 2.67 0.00850
## 4 parous 69.7 107. 0.654 0.514
## 5 pcsec 157. 87.1 1.80 0.0746
## 6 palco -89.7 102. -0.876 0.383
## 7 cohortSV -86.7 97.6 -0.888 0.376
## 8 PC1_cells -167. 260. -0.643 0.521
## 9 PC2_cells -375. 379. -0.990 0.324
## 10 sexM 185. 71.3 2.59 0.0108
## 11 ga_meth 19.7 4.96 3.98 0.000113
## 12 cg08985979 -4770. 2239. -2.13 0.0350
cg08985979_pred <- prediction(lm(bwgt ~ age + bmi + parous + pcsec +
palco + cohort + PC1_cells + PC2_cells + sex + ga_meth +
cg08985979,
data = dfb_sig_awar_nr))
# Make another version:
label1 <- expression(bold("b = -4770.2, p = 0.03"))
cg08985979_pred_2 <-
ggplot(cg08985979_pred, aes(x = cg08985979, y = fitted)) +
geom_point(shape = 20, alpha = 2/3) +
geom_smooth(method = "lm", se = TRUE, color = "red", fill = "salmon") +
theme_bw() +
ylab("Adj. birthweight\n (grams)") +
xlab("Methylation at cg08985979") +
# ggtitle("Birthweight is associated with\n methylation at cg08985979 in newborns") +
# annotate("text",x = 0.165, y = 3550,
# label = label1,
# hjust = 0,
# fontface = 2,
# size = 4) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(text=element_text(size=6.67))
cg08985979_pred_2
War Stress had a significant negative association with birthweight (beta = -125.98798, p = 0.000695) in babies. Update this beta and p in text.
model_awar_nr <- lm(bwgt ~ bmi + age + pcsec + palco +
parous + cohort + ga_meth + sex + awar_nr, data = dfb)
nrow(model.frame(model_awar_nr))
## [1] 144
tidy(model_awar_nr) # significant negative association.
## # A tibble: 10 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -2948. 1281. -2.30 0.0229
## 2 bmi 23.5 9.64 2.44 0.0162
## 3 age 18.8 7.87 2.38 0.0185
## 4 pcsec 227. 82.8 2.74 0.00702
## 5 palco -110. 99.0 -1.11 0.270
## 6 parous 68.0 101. 0.670 0.504
## 7 cohortSV -111. 92.2 -1.21 0.229
## 8 ga_meth 17.8 4.58 3.89 0.000157
## 9 sexM 197. 66.9 2.95 0.00375
## 10 awar_nr -126. 36.3 -3.47 0.000695
# Plot the raw data
ggplot(dfb, aes(x = awar_nr, y = bwgt, color = cohort)) +
geom_point(alpha = 0.75) +
geom_smooth(method = "lm", se = FALSE) +
theme_pubclean() +
ggtitle("No significant interaction between cohort and stress")
awar_nr_bwgt_pred <- prediction(lm(bwgt ~ bmi + age + pcsec +
palco + parous + cohort +
ga_meth + sex + awar_nr, data = dfb))
set.seed(456)
awar_nr_bwgt_pred_plot <-
ggplot(awar_nr_bwgt_pred, aes(x = awar_nr, y = fitted)) +
geom_jitter(shape = 20, alpha = 2/3, width = 0.15) +
geom_smooth(method = "lm", se = TRUE, color = "red", fill = "salmon") +
theme_bw() +
ylab("Adj. birthweight\n (grams)") +
xlab("War Trauma") +
# ggtitle("Birthweight is associated with\n methylation at cg08985979 in newborns") +
# annotate("text",x = 0.165, y = 3550,
# label = label1,
# hjust = 0,
# fontface = 2,
# size = 4) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(text=element_text(size=6.67))
awar_nr_bwgt_pred_plot
model_stress_meth <- rlm(cg08985979 ~ bmi + age + pcsec + palco +
parous + cohort + ga_meth + sex + PC1_cells +
PC2_cells + awar_nr, data = dfb_sig_awar_nr)
mod_sum_robust(model_stress_meth)
## # A tibble: 12 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.108 0.0465 2.32 0.0205 0.0166 0.199
## 2 bmi 0.000224 0.000365 0.615 0.539 -0.000490 0.000939
## 3 age 0.0000708 0.000275 0.257 0.797 -0.000469 0.000611
## 4 pcsec -0.00851 0.00280 -3.04 0.00236 -0.0140 -0.00303
## 5 palco 0.00445 0.00331 1.35 0.179 -0.00203 0.0109
## 6 parous 0.00536 0.00373 1.44 0.151 -0.00195 0.0127
## 7 cohortSV 0.000359 0.00398 0.0903 0.928 -0.00744 0.00816
## 8 ga_meth -0.0000111 0.000174 -0.0634 0.949 -0.000353 0.000331
## 9 sexM -0.000581 0.00250 -0.232 0.816 -0.00548 0.00432
## 10 PC1_cells -0.0179 0.00681 -2.63 0.00848 -0.0313 -0.00458
## 11 PC2_cells -0.0595 0.0100 -5.93 0.00000000302 -0.0792 -0.0399
## 12 awar_nr 0.00598 0.00107 5.60 0.0000000210 0.00389 0.00807
nrow(model.frame(model_stress_meth))
## [1] 145
# Plot the raw data
ggplot(dfb_sig_awar_nr, aes(x = awar_nr, y = cg08985979, color = cohort)) +
geom_point(alpha = 0.75) +
geom_smooth(method = "lm", se = FALSE) +
theme_pubclean() +
ggtitle("No significant interaction between cohort and stress")
stress_meth_pred <- prediction(rlm(cg08985979 ~ bmi + age + pcsec + palco +
parous + cohort + ga_meth + sex + PC1_cells +
PC2_cells + awar_nr, data = dfb_sig_awar_nr))
set.seed(456)
stress_meth_pred_plot <-
ggplot(stress_meth_pred, aes(x = awar_nr, y = fitted)) +
geom_jitter(shape = 20, alpha = 2/3, width = 0.15) +
geom_smooth(method = "lm", se = TRUE, color = "red", fill = "salmon") +
theme_bw() +
ylab("Adj. DNAm at\n cg08985979") +
xlab("War Trauma") +
# ggtitle("Birthweight is associated with\n methylation at cg08985979 in newborns") +
# annotate("text",x = 0.165, y = 3550,
# label = label1,
# hjust = 0,
# fontface = 2,
# size = 4) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(text=element_text(size=6.67))
stress_meth_pred_plot
A single hit in mothers across 40 tests. General trauma associated with methylation PC6 (p=0.04).
ALL NEGATIVE RESULTS.
ALL NEGATIVE RESULTS.
mom_cell_heatmap
Bcell = B cell. CD4T = CD4 positive T cell. CD8T = CD8 positive T cell. Mono = Monocyte. Neu = Neutrophil. NK = Natural Killer cell.
baby_cell_heatmap
Bcell = B cell. CD4T = CD4 positive T cell. CD8T = CD8 positive T cell. Gran = Granulocyte. Mono = Monocyte. NK = Natural Killer cell. nRBC = nucleated Red Blood cell.
mom_eaa_heatmap
This heatmap depicts relationships between epigenetic age acceleration and maternal stress. The different epigenetic clocks listed on the y axis are: Horvath = Horvath’s original clock. IEAA = Intrinsic Epigenetic Age Acceleration. Essentially Horvath’s clock with a cell type correction. EEAA = Extrinsic Epigenetic Age Acceleration. This measure tries to take into account age-associated changes in immune cell type and capitalize on them to make a better measure of age acceleration. This measure depends on cell type by design. Telomere = An epigenetic marker of telomere length. PhenoAge = Predictive of mortality. GrimAge = Also predictive of mortality.
baby_eaa_heatmap
epiage <- mom_eaa_heatmap %v% baby_eaa_heatmap
draw(epiage)
mom_setot_pheno_age_prediction <- prediction(lm(AgeAccelPheno ~ bmi +
pcsec +
palco +
parous +
cohort +
setot, data = mom_mage))
set.seed(456)
mom_setot_pheno_age_plot <-
ggplot(mom_setot_pheno_age_prediction, aes(x = setot, y = fitted,
fill = fitted,
color = fitted)) +
geom_jitter(shape = 21, width = 0.25, height = 0.05,
color = "black",
size = 2.5, alpha = 0.75) +
scale_fill_viridis_c(direction = -1,
option = "magma",
name = "PhenoAge\nAcceleration") +
scale_color_viridis_c(direction = -1,
option = "magma",
name = "PhenoAge\nAcceleration") +
scale_x_continuous(breaks = seq(0,6,1), labels = seq(0,6,1)) +
theme_classic() +
geom_smooth(method = "lm", color = "grey", fill = "grey") +
xlab("Sexual Trauma in Mothers") +
ylab("Adj. PhenoAge Accel")
mom_setot_grim_age_prediction <- prediction(lm(AgeAccelGrim ~ bmi +
pcsec +
palco +
parous +
cohort +
setot, data = mom_mage))
set.seed(456)
mom_setot_grim_age_plot <-
ggplot(mom_setot_grim_age_prediction, aes(x = setot, y = fitted,
fill = fitted,
color = fitted)) +
geom_jitter(shape = 21, width = 0.25, height = 0.05,
color = "black",
size = 3, alpha = 0.75) +
scale_fill_viridis_c(direction = -1,
option = "magma",
name = "GrimAge\nAcceleration") +
scale_color_viridis_c(direction = -1,
option = "magma",
name = "GrimAge\nAcceleration") +
scale_x_continuous(breaks = seq(0,6,1), labels = seq(0,6,1)) +
theme_classic() +
geom_smooth(method = "lm", color = "grey", fill = "grey") +
xlab("Sexual Trauma in Mothers") +
ylab("Adj. GrimAge Accel")
mom_setot_EEAA_prediction <- prediction(lm(EEAA ~ bmi +
pcsec +
palco +
parous +
cohort +
setot, data = mom_mage))
set.seed(456)
mom_setot_eeaa_age_plot <-
ggplot(mom_setot_EEAA_prediction, aes(x = setot, y = fitted,
fill = fitted,
color = fitted)) +
geom_jitter(shape = 21, width = 0.25, height = 0.05,
color = "black",
size = 3, alpha = 0.75) +
scale_fill_viridis_c(direction = -1,
option = "magma",
name = "Extrinsic\nEpigenetic\nAge\nAcceleration") +
scale_color_viridis_c(direction = -1,
option = "magma",
name = "Extrinsic\nEpigenetic\nAge\nAcceleration") +
scale_x_continuous(breaks = seq(0,6,1), labels = seq(0,6,1)) +
theme_classic() +
geom_smooth(method = "lm", color = "grey", fill = "grey") +
xlab("Sexual Trauma in Mothers") +
ylab("Adj. Extrinsic Accel")
mom_setot_telomere_prediction <- prediction(lm(DNAmTLAdjAge ~ bmi +
pcsec +
palco +
parous +
cohort +
setot, data = mom_mage))
set.seed(456)
mom_setot_telomere_age_plot <-
ggplot(mom_setot_telomere_prediction, aes(x = setot, y = fitted,
fill = fitted,
color = fitted)) +
geom_jitter(shape = 21, width = 0.25, height = 0.05,
color = "black",
size = 3, alpha = 0.75) +
scale_fill_viridis_c(direction = 1,
option = "magma",
name = "Telomere\nLength") +
scale_color_viridis_c(direction = 1,
option = "magma",
name = "Telomere\nLength") +
scale_x_continuous(breaks = seq(0,6,1), labels = seq(0,6,1)) +
theme_classic() +
geom_smooth(method = "lm", color = "grey", fill = "grey") +
xlab("Sexual Trauma in Mothers") +
ylab("Adj. Telomere Length")
baby_gtsum_EEAA_plot <- prediction(lm(EEAA ~ bmi + pcsec + palco + parous + cohort +
sex + ga_meth + gtsum, data = baby_mage))
set.seed(456)
baby_gtsum_eeaa_plot <-
ggplot(baby_gtsum_EEAA_plot, aes(x = gtsum, y = fitted,
fill = fitted,
color = fitted)) +
geom_jitter(shape = 21, width = 0.25, height = 0.05,
color = "black",
size = 3, alpha = 0.75) +
scale_fill_viridis_c(direction = -1,
option = "magma",
name = "Extrinsic\nEpigenetic\nAge\nAcceleration") +
scale_color_viridis_c(direction = -1,
option = "magma",
name = "Extrinsic\nEpigenetic\nAge\nAcceleration") +
scale_x_continuous(breaks = seq(0,8,1), labels = seq(0,8,1)) +
theme_classic() +
geom_smooth(method = "lm", color = "grey", fill = "grey") +
xlab("General Trauma in Newborns") +
ylab("Adj. Extrinsic Accel")
baby_awar_nr_EEAA_plot <- prediction(lm(EEAA ~ bmi + pcsec + palco + parous + cohort +
sex + ga_meth + awar_nr, data = baby_mage))
set.seed(456)
baby_awar_nr_eeaa_plot <-
ggplot(baby_awar_nr_EEAA_plot, aes(x = awar_nr, y = fitted,
fill = fitted,
color = fitted)) +
geom_jitter(shape = 21, width = 0.25, height = 0.05,
color = "black",
size = 3, alpha = 0.75) +
scale_fill_viridis_c(direction = -1,
option = "magma",
name = "Extrinsic\nEpigenetic\nAge\nAcceleration") +
scale_color_viridis_c(direction = -1,
option = "magma",
name = "Extrinsic\nEpigenetic\nAge\nAcceleration") +
scale_x_continuous(breaks = seq(0,8,1), labels = seq(0,8,1)) +
theme_classic() +
geom_smooth(method = "lm", color = "grey", fill = "grey") +
xlab("War Trauma in Newborns") +
ylab("Adj. Extrinsic Accel")
addSmallLegend <- function(myPlot, pointSize = 0.5, textSize = 8, spaceLegend = 0.8) {
myPlot +
#guides(shape = guide_legend(override.aes = list(size = pointSize)),
# color = guide_legend(override.aes = list(size = pointSize))) +
theme(legend.title = element_text(size = textSize),
legend.text = element_text(size = textSize),
legend.key.size = unit(spaceLegend, "lines"))
}
library(patchwork)
addSmallLegend(mom_setot_eeaa_age_plot) +
addSmallLegend(mom_setot_telomere_age_plot) +
addSmallLegend(mom_setot_pheno_age_plot) +
addSmallLegend(mom_setot_grim_age_plot) +
addSmallLegend(baby_gtsum_eeaa_plot) +
addSmallLegend(baby_awar_nr_eeaa_plot) +
plot_layout(ncol = 2) +
plot_annotation(tag_levels = "A")
# Get epigenetic age acceleration model parameters
summary(lm(AgeAccelPheno ~ bmi + pcsec + palco + parous + cohort + setot, data = mom_mage))
##
## Call:
## lm(formula = AgeAccelPheno ~ bmi + pcsec + palco + parous + cohort +
## setot, data = mom_mage)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.692 -7.441 0.265 7.541 36.288
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.67928 8.00427 -0.959 0.33899
## bmi 0.01278 0.27573 0.046 0.96308
## pcsec -3.41020 2.35950 -1.445 0.15058
## palco -1.02164 2.83899 -0.360 0.71949
## parous 14.57064 2.43344 5.988 1.66e-08 ***
## cohortSV 7.79501 2.46472 3.163 0.00191 **
## setot 1.36186 0.60631 2.246 0.02624 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.56 on 142 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.4841, Adjusted R-squared: 0.4623
## F-statistic: 22.21 on 6 and 142 DF, p-value: < 2.2e-16
summary(lm(AgeAccelGrim ~ bmi + pcsec + palco + parous + cohort + setot, data = mom_mage))
##
## Call:
## lm(formula = AgeAccelGrim ~ bmi + pcsec + palco + parous + cohort +
## setot, data = mom_mage)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8957 -2.0845 0.2629 1.9500 7.5035
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.70218 2.26262 1.194 0.2344
## bmi -0.16631 0.07794 -2.134 0.0346 *
## pcsec -0.00725 0.66698 -0.011 0.9913
## palco -0.21906 0.80252 -0.273 0.7853
## parous 3.31421 0.68788 4.818 3.68e-06 ***
## cohortSV 1.03075 0.69672 1.479 0.1412
## setot 0.42460 0.17139 2.477 0.0144 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.266 on 142 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.3933, Adjusted R-squared: 0.3677
## F-statistic: 15.35 on 6 and 142 DF, p-value: 1.646e-13
summary(lm(EEAA ~ bmi + pcsec + palco + parous + cohort + setot, data = mom_mage))
##
## Call:
## lm(formula = EEAA ~ bmi + pcsec + palco + parous + cohort + setot,
## data = mom_mage)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.6705 -4.0579 -0.2834 3.7767 25.2586
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.8741 4.8832 -0.998 0.319910
## bmi 0.0027 0.1682 0.016 0.987218
## pcsec -1.9847 1.4395 -1.379 0.170130
## palco 1.0050 1.7320 0.580 0.562647
## parous 7.5865 1.4846 5.110 1.02e-06 ***
## cohortSV 5.4545 1.5037 3.628 0.000398 ***
## setot 1.0865 0.3699 2.937 0.003864 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.049 on 142 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.473, Adjusted R-squared: 0.4507
## F-statistic: 21.24 on 6 and 142 DF, p-value: < 2.2e-16
summary(lm(DNAmTLAdjAge ~ bmi + pcsec + palco + parous + cohort + setot, data = mom_mage))
##
## Call:
## lm(formula = DNAmTLAdjAge ~ bmi + pcsec + palco + parous + cohort +
## setot, data = mom_mage)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51666 -0.10899 -0.00522 0.11513 0.52193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.140668 0.123610 1.138 0.257034
## bmi -0.002119 0.004258 -0.498 0.619447
## pcsec 0.046587 0.036438 1.279 0.203142
## palco 0.029567 0.043842 0.674 0.501155
## parous -0.133149 0.037579 -3.543 0.000536 ***
## cohortSV -0.128444 0.038063 -3.375 0.000954 ***
## setot -0.021505 0.009363 -2.297 0.023093 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1784 on 142 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.3535, Adjusted R-squared: 0.3262
## F-statistic: 12.94 on 6 and 142 DF, p-value: 1.226e-11
summary(lm(EEAA ~ bmi + pcsec + palco + parous + cohort +
sex + ga_meth + gtsum, data = baby_mage))
##
## Call:
## lm(formula = EEAA ~ bmi + pcsec + palco + parous + cohort + sex +
## ga_meth + gtsum, data = baby_mage)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.2612 -3.7631 0.8857 3.9407 15.3441
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -62.687959 19.630883 -3.193 0.00175 **
## bmi -0.004813 0.146732 -0.033 0.97388
## pcsec -1.213191 1.251190 -0.970 0.33395
## palco -0.649081 1.525693 -0.425 0.67119
## parous -1.164019 1.300273 -0.895 0.37226
## cohortSV 3.307231 1.330686 2.485 0.01415 *
## sexM -0.499230 1.030665 -0.484 0.62890
## ga_meth 0.207682 0.069349 2.995 0.00326 **
## gtsum 0.696777 0.305255 2.283 0.02401 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.082 on 136 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.1244, Adjusted R-squared: 0.0729
## F-statistic: 2.415 on 8 and 136 DF, p-value: 0.01803
summary(lm(EEAA ~ bmi + pcsec + palco + parous + cohort +
sex + ga_meth + awar_nr, data = baby_mage))
##
## Call:
## lm(formula = EEAA ~ bmi + pcsec + palco + parous + cohort + sex +
## ga_meth + awar_nr, data = baby_mage)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.7296 -3.5601 0.6236 4.1669 14.4941
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -65.13881 19.85300 -3.281 0.00131 **
## bmi 0.02099 0.14821 0.142 0.88758
## pcsec -1.46966 1.26671 -1.160 0.24799
## palco -0.71590 1.53141 -0.467 0.64091
## parous -1.23036 1.30770 -0.941 0.34845
## cohortSV 3.45024 1.34259 2.570 0.01125 *
## sexM -0.45440 1.03451 -0.439 0.66118
## ga_meth 0.21576 0.06989 3.087 0.00245 **
## awar_nr 1.12135 0.56137 1.998 0.04776 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.109 on 136 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.1168, Adjusted R-squared: 0.06482
## F-statistic: 2.248 on 8 and 136 DF, p-value: 0.02757
awar_nr_bwgt_pred_plot +
cg08985979_pred_2 +
stress_meth_pred_plot +
plot_layout(ncol = 1) +
plot_annotation(tag_levels = list(c("A","B","C")))
awar_nr_bwgt_pred_plot +
theme_bw(base_size = 9) +
theme(axis.title = element_text(face="bold")) +
theme(axis.text = element_text(face="bold")) +
cg08985979_pred_2 +
theme_bw(base_size = 9) +
theme(axis.title = element_text(face="bold")) +
theme(axis.text = element_text(face="bold")) +
stress_meth_pred_plot +
theme_bw(base_size = 9) +
theme(axis.title = element_text(face="bold")) +
theme(axis.text = element_text(face="bold")) +
plot_layout(nrow = 1) +
plot_annotation(tag_levels = list(c("A","B","C")))
# This text is copied from above:
####################################################################
# Step 1 probe attrition final counts:
# initial masking of bad probes, snp probes, and non cg probes:
# 127157
# Additional masking of probes failing in all samples by preprocessing:
# 1081
# Additional probes set to NA because of zeroIntensity, that were
# not already marked NA in all samples by initial masking or preprocessing:
# 873.
# Total probes masked for every single person at this stage = 129111.
# In preparation for ComBat, all probes failing in more than 10%
# of samples were removed, inclusive of all those above. That step
# takes us from 866553 to 706987. That's a difference of 159566.
# subtracting 129,111 from 159,566 gives us the number of probes
# were masked because they failed in too many samples (but less
# than failed in every single sample). 30,455.
####################################################################
# Mothers
mermaid("
graph TB
title[<u>Congo EWAS Probe Attrition in Mothers</u>]
title-->A
style title fill:#FFF,stroke:#FFF
linkStyle 0 stroke:#FFF,stroke-width:0;
A[Total probes on array n = 866,553] -->|remove non-CpG probes,off-target probes,and SNP sites, n = 127,157| B
B[n = 739,396] -->|remove probes that failed in preprocessing for all samples, n = 1,081| C
C[n = 738,315] -->|remove probes that had zero intensity in >10% of samples, n = 873| D
D[n = 737,442] --> |remove probes that failed in >10% of samples, n = 30,455|E
E[n = 706,987] --> |remove probes with Y chromosome annotation, n = 6|F
F[Final probe set n = 706,981]
"
)
# Babies
mermaid("
graph TB
title[<u>Congo EWAS Probe Attrition in Babies</u>]
title-->A
style title fill:#FFF,stroke:#FFF
linkStyle 0 stroke:#FFF,stroke-width:0;
A[Total probes on array n = 866,553] -->|remove non-CpG probes,off-target probes,and SNP sites, n = 127,157| B
B[n = 739,396] -->|remove probes that failed in preprocessing for all samples, n = 1,081| C
C[n = 738,315] -->|remove probes that had zero intensity in >10% of samples, n = 873| D
D[n = 737,442] --> |remove probes that failed in >10% of samples, n = 30,455|E
E[n = 706,987] --> |remove X and Y chromosome probes, n = 15,120|F
F[Final probe set n = 691,867]
"
)
mermaid("
graph TB
title[<u>Congo EWAS Sample Attrition </u>]
title-->A
style title fill:#FFF,stroke:#FFF
linkStyle 0 stroke:#FFF,stroke-width:0;
A[Total methylation files n = 496] -->|remove placenta samples, n = 24| B
B[n = 472] -->|remove cord blood samples, n = 4| C
C[n = 468] -->|remove twins and their mothers, n = 6| D
D[n = 462] --> |remove follow-up samples, n = 106|E
E[n = 356] --> |remove samples failing methylation qc and sex checks, n = 10|F
F[n = 346] --> |remove samples belonging to small batches, n = 23|G
G[n = 323] --> |remove samples that are technical replicates, n = 7|H
H[n = 316] --> |remove samples that are duplicate mothers or sibling samples, n = 14|I
I[Final data set n = 302. 151 mothers & 151 babies]
"
)
mermaid("
graph TB
title[<u>Congo Epigenetic Age Sample Attrition </u>]
title-->A
style title fill:#FFF,stroke:#FFF
linkStyle 0 stroke:#FFF,stroke-width:0;
A[Total methylation files n = 496] -->|remove placenta samples, n = 24| B
B[n = 472] -->|remove cord blood samples, n = 4| C
C[n = 468] -->|remove twins and their mothers, n = 6| D
D[n = 462] --> |remove follow-up samples, n = 106|E
E[n = 356] --> |average 41 replicate samples from 17 participants, n = 24|F
F[n = 332] --> |remove samples failing methylation qc and meffil sex checks, n = 8|G
G[n = 324] --> |remove duplicate mom and sibling samples, n = 14|H
H[Final data set n = 310. 155 mothers and 155 newborns]
"
)
#
# # Making a single table for mothers by cohort and overall is fine.
# stress <- read_csv(here("data","maternal_stress_measures_20220112.csv"))
#
# pheno_stress <- dfm %>%
# left_join(stress, by = c("idno" = "idno"))
#
# df3 <- df2 %>%
# left_join(pheno_stress, by = c("dyad" = "idno"))
df <- read.csv(here("data","survey_data_20220610.csv"), header=TRUE, stringsAsFactors = FALSE)
library(psych)
df$idno <- paste0(df$ID, df$no)
# Recode for unhappy marriage
df$happy_marriage <- ifelse(df$marr==0,NA,df$happy_marriage)
df$unhapmar <- ifelse(df$happy_marriage==1,0,1)
df$unhapmar[is.na(df$unhapmar)] <- 0
# Recode for general stress
df$genstress <- ifelse(df$majstress==1 | df$dailystress==1,1,0)
df$genstress[is.na(df$genstress)] <- 0
# Recode for victim of violence
df$vicviol <- ifelse(df$wvicc==1 | df$wvica==1,1,0)
# Reverse code variables in chronic stress scale
df$nohelp <- ifelse(df$phelp==1,0,1)
df$nochoice <- ifelse(df$choice_in_rep_decision==1,0,1)
df$foodinsuf <- ifelse(df$pfood==1,0,1)
df$notwantpreg <- ifelse(df$pwant==1,0,1)
df$noprenat <- ifelse(df$pprenatal_care==1,0,1)
df$trouble_pay_bills <- as.numeric(df$trouble_pay_bills)
df$notownhm <- ifelse(df$ownhm==1,0,1)
df$nowed <- ifelse(df$wedding==1,0,1)
df$no_loca_choice <- ifelse(df$choose_loca_birth==1,0,1)
df$ctalk <- ifelse(df$ctalk==1,0,1)
df$travalone <- ifelse(df$travel_with_someone_to_hospital==1,0,1)
# removed se4, preg_from_rape, raped_while_preg, your_birth_from_rape
# add in wrefug, wrefalone, and refug_bad since these are also war-related variables
awar_nr <- cbind.data.frame(df$wrefug,
df$wfkill,
df$wknap,
df$Battle.at.home,
df$current_refugee,
df$wrefalone)
awar_nr <- awar_nr %>% replace(is.na(.),0)
# alpha(awar_nr, check.keys = T) # 0.35
# Chronic stress
achronic <- cbind.data.frame(df$nohelp,
df$sickness,
df$cried,
df$ccry,
df$genstress,
df$notownhm,
df$nochoice,
df$foodinsuf,
df$notwantpreg,
df$noprenat,
df$pafraid,
df$trouble_pay_bills,
df$pemoa,
df$pphysa,
df$nowed,
df$unhapmar)
achronic <- achronic %>% replace(is.na(.), 0)
#alpha(achronic, check.keys = T) # 0.84
# Sexual events
var <- c("se1", "se2", "se3", "se4", "se5", "se6")
se <- df[,var]
se <- se %>% replace(is.na(.),0)
# alpha(se, check.keys = TRUE) # 0.79
# General trauma
# gt5 is negatively correlated so removed
var <- c("gt1", "gt2", "gt3", "gt4", "gt6", "gt7", "gt8", "gt9", "gt10", "gt11")
gt <- df[,var]
gt <- gt %>% replace(is.na(.),0)
# alpha(gt, check.keys = TRUE) # 0.64; gt5 is negatively correlated
# Subset for participants in our data and then make frequency tables
df2 <- df %>% replace(is.na(.),0)
df2 <- df2[which(df2$idno %in% mom_mage$dyad),]
df2$Cohort <- ifelse(df2$ID=="C","General Maternity Ward","Sexual Violence Ward")
# get war trauma frequencies for combined cohorts
df2$war_refugee <- factor(df2$wrefug, levels = c(0,1),
labels = c("No","Yes"))
label(df2$war_refugee) <- "War refugee"
df2$family_killed_war <- factor(df2$wfkill, levels = c(0,1),
labels = c("No","Yes"))
label(df2$family_killed_war) <- "Family member killed in war"
df2$kidnapped <- factor(df2$wknap, levels = c(0,1),
labels = c("No","Yes"))
label(df2$kidnapped) <- "Kidnapped as a result of the war"
df2$soldiers_torment_village <- factor(df2$Battle.at.home, levels = c(0,1),
labels = c("No","Yes"))
label(df2$soldiers_torment_village) <- "Soldiers tormented village"
df2$current_refugee <- factor(df2$current_refugee, levels = c(0,1),
labels = c("No","Yes"))
label(df2$current_refugee) <- "Current refugee"
df2$refugee_alone <- factor(df2$wrefalone, levels = c(0,1),
labels = c("No","Yes"))
label(df2$refugee_alone) <- "Was alone as a war refugee"
# get chronic stress frequencies for combined cohorts
df2$no_help_at_home <- factor(df2$nohelp, levels = c(0,1),
labels = c("No","Yes")) # no help at home
label(df2$no_help_at_home) <- "No help"
df2$sick_in_pregnancy <- factor(df2$sickness, levels = c(0,1),
labels = c("No","Yes")) # sick in pregnancy
label(df2$sick_in_pregnancy) <- "Sickness in pregnancy"
df2$cried_in_pregnancy <- factor(df2$cried, levels = c(0,1),
labels = c("No","Yes")) # did you cry in pregnancy
label(df2$cried_in_pregnancy) <- "Cried during pregnancy"
df2$ashmed_to_cry <- factor(df2$ccry, levels = c(0,1),
labels = c("No","Yes")) # did you cope by crying
label(df2$ashmed_to_cry) <- "Ashamed to cry"
df2$stress_in_pregnancy <- factor(df2$genstress, levels = c(0,1),
labels = c("No","Yes")) # stress during pregnancy
label(df2$stress_in_pregnancy) <- "Presence of general stressors"
df2$did_not_own_home <- factor(df2$notownhm, levels = c(0,1),
labels = c("No","Yes")) # did not own home
label(df2$did_not_own_home) <- "Did not own home"
df2$no_choice_reproduction <- factor(df2$nochoice, levels = c(0,1),
labels = c("No","Yes")) # no choice in reproductive decision
label(df2$no_choice_reproduction) <- "No choice in reproductive decision"
df2$insufficient_food <- factor(df2$foodinsuf, levels = c(0,1),
labels = c("No","Yes")) # insufficient food
label(df2$insufficient_food) <- "Did not have enough food during pregnancy"
df2$did_not_want_pregnancy <- factor(df2$notwantpreg, levels = c(0,1),
labels = c("No","Yes")) # did not want pregnancy
label(df2$did_not_want_pregnancy) <- "Did not want to get pregnant"
df2$no_prenatal_care <- factor(df2$noprenat, levels = c(0,1),
labels = c("No","Yes")) # no prenatal care
label(df2$no_prenatal_care) <- "No prenatal care"
df2$afraid_at_night <- factor(df2$pafraid, levels = c(0,1),
labels = c("No","Yes")) # afraid at night
label(df2$afraid_at_night) <- "Afraid at night"
df2$trouble_paying_bills <- factor(df2$trouble_pay_bills, levels = c(0,1),
labels = c("No","Yes")) # trouble paying bills
label(df2$trouble_paying_bills) <- "Trouble paying bills"
df2$emotionally_abused <- factor(df2$pemoa, levels = c(0,1),
labels = c("No","Yes")) # emotional abuse
label(df2$emotionally_abused) <- "Emotionally abused"
df2$physically_abused <- factor(df2$pphysa, levels = c(0,1),
labels = c("No","Yes")) # physical abuse
label(df2$physically_abused) <- "Physically abused"
df2$no_wedding <- factor(df2$nowed, levels = c(0,1),
labels = c("No","Yes")) # no wedding
label(df2$no_wedding) <- "Did not have a wedding"
df2$unhappy_marriage <- factor(df2$unhapmar, levels = c(0,1),
labels = c("No","Yes")) # unhappy marriage
label(df2$unhappy_marriage) <- "Unhappy marriage"
# get sexual events frequencies for both cohorts
df2$uncomfortable_touching <- factor(df2$se1, levels = c(0,1),
labels = c("No","Yes"))
label(df2$uncomfortable_touching) <- "Uncomfortably touched in intimate parts"
df2$genital_rubbing <- factor(df2$se2, levels = c(0,1),
labels = c("No","Yes"))
label(df2$genital_rubbing) <- "Someone rubbed genitals against you"
df2$forced_to_touch_someone <- factor(df2$se3, levels = c(0,1),
labels = c("No","Yes"))
label(df2$forced_to_touch_someone) <- "Forced to touch intimate parts"
df2$raped_penetrative <- factor(df2$se4, levels = c(0,1),
labels = c("No","Yes"))
label(df2$raped_penetrative) <- "Someone had genital sex against your will"
df2$raped_oral_sex <- factor(df2$se5, levels = c(0,1),
labels = c("No","Yes"))
label(df2$raped_oral_sex) <- "Forced to perform oral sex"
df2$coerced_kiss <- factor(df2$se6, levels = c(0,1),
labels = c("No", "Yes"))
label(df2$coerced_kiss) <- "Forced to kiss someone in a sexual way"
# get general trauma frequencies for both cohorts
df2$natural_disaster <- factor(df2$gt1, levels = c(0,1),
labels = c("No","Yes"))
label(df2$natural_disaster) <- "Exposed to life-threatening natural disaster"
df2$serious_accident <- factor(df2$gt2, levels = c(0,1),
labels = c("No","Yes"))
label(df2$serious_accident) <- "Involved in a serious accident"
df2$serious_injury_illness <- factor(df2$gt3, levels = c(0,1),
labels = c("No","Yes"))
label(df2$serious_injury_illness) <- "Suffered a serious personal injury or illness"
df2$death_illness_caretaker <- factor(df2$gt4, levels = c(0,1),
labels = c("No","Yes"))
label(df2$death_illness_caretaker) <- "Death or serious illness/injury of parent or caretaker"
df2$death_illness_sibling <- factor(df2$gt6, levels = c(0,1),
labels = c("No","Yes"))
label(df2$death_illness_sibling) <- "Death or serious illness/injury of a sibling"
df2$death_illness_friend <- factor(df2$gt7, levels = c(0,1),
labels = c("No","Yes"))
label(df2$death_illness_friend) <- "Death or serious illness/injury of friend"
df2$witnessed_violence <- factor(df2$gt8, levels = c(0,1),
labels = c("No","Yes"))
label(df2$witnessed_violence) <- "Witnessed violence towards others"
df2$family_mental_illness <- factor(df2$gt9, levels = c(0,1),
labels = c("No","Yes"))
label(df2$family_mental_illness) <- "Family mental illness"
df2$caretaker_drugs <- factor(df2$gt10, levels = c(0,1),
labels = c("No","Yes"))
label(df2$caretaker_drugs) <- "Parents or caretakes who use drugs"
df2$witnessed_murder <- factor(df2$gt11, levels = c(0,1),
labels = c("No","Yes"))
label(df2$witnessed_murder) <- "Witnessed someone murdered"
table1(~ war_refugee +
family_killed_war +
kidnapped +
soldiers_torment_village +
current_refugee +
refugee_alone | Cohort, data = df2, overall="Total",
extra.col=list(`p`=pvalue_table), extra.col.pos=3)
| General Maternity Ward (N=90) |
Sexual Violence Ward (N=65) |
p | Total (N=155) |
|
|---|---|---|---|---|
| War refugee | ||||
| No | 26 (28.9%) | 30 (46.2%) | 0.029 | 56 (36.1%) |
| Yes | 64 (71.1%) | 35 (53.8%) | 99 (63.9%) | |
| Family member killed in war | ||||
| No | 82 (91.1%) | 50 (76.9%) | 0.021 | 132 (85.2%) |
| Yes | 8 (8.9%) | 15 (23.1%) | 23 (14.8%) | |
| Kidnapped as a result of the war | ||||
| No | 88 (97.8%) | 62 (95.4%) | 0.65 | 150 (96.8%) |
| Yes | 2 (2.2%) | 3 (4.6%) | 5 (3.2%) | |
| Soldiers tormented village | ||||
| No | 81 (90.0%) | 59 (90.8%) | 1 | 140 (90.3%) |
| Yes | 9 (10.0%) | 6 (9.2%) | 15 (9.7%) | |
| Current refugee | ||||
| No | 89 (98.9%) | 63 (96.9%) | 0.572 | 152 (98.1%) |
| Yes | 1 (1.1%) | 2 (3.1%) | 3 (1.9%) | |
| Was alone as a war refugee | ||||
| No | 80 (88.9%) | 64 (98.5%) | 0.026 | 144 (92.9%) |
| Yes | 10 (11.1%) | 1 (1.5%) | 11 (7.1%) |
table1(~ no_help_at_home +
sick_in_pregnancy +
cried_in_pregnancy +
ashmed_to_cry +
stress_in_pregnancy +
did_not_own_home +
no_choice_reproduction +
insufficient_food +
did_not_want_pregnancy +
no_prenatal_care +
afraid_at_night +
trouble_paying_bills +
emotionally_abused +
physically_abused +
no_wedding +
unhappy_marriage | Cohort, data = df2, overall="Total",
extra.col=list(`p`=pvalue_table), extra.col.pos=3)
| General Maternity Ward (N=90) |
Sexual Violence Ward (N=65) |
p | Total (N=155) |
|
|---|---|---|---|---|
| No help | ||||
| No | 68 (75.6%) | 48 (73.8%) | 0.852 | 116 (74.8%) |
| Yes | 22 (24.4%) | 17 (26.2%) | 39 (25.2%) | |
| Sickness in pregnancy | ||||
| No | 83 (92.2%) | 41 (63.1%) | <0.001 | 124 (80.0%) |
| Yes | 7 (7.8%) | 24 (36.9%) | 31 (20.0%) | |
| Cried during pregnancy | ||||
| No | 37 (41.1%) | 18 (27.7%) | 0.092 | 55 (35.5%) |
| Yes | 53 (58.9%) | 47 (72.3%) | 100 (64.5%) | |
| Ashamed to cry | ||||
| No | 69 (76.7%) | 29 (44.6%) | <0.001 | 98 (63.2%) |
| Yes | 21 (23.3%) | 36 (55.4%) | 57 (36.8%) | |
| Presence of general stressors | ||||
| No | 48 (53.3%) | 19 (29.2%) | 0.003 | 67 (43.2%) |
| Yes | 42 (46.7%) | 46 (70.8%) | 88 (56.8%) | |
| Did not own home | ||||
| No | 25 (27.8%) | 15 (23.1%) | 0.579 | 40 (25.8%) |
| Yes | 65 (72.2%) | 50 (76.9%) | 115 (74.2%) | |
| No choice in reproductive decision | ||||
| No | 61 (67.8%) | 8 (12.3%) | <0.001 | 69 (44.5%) |
| Yes | 29 (32.2%) | 57 (87.7%) | 86 (55.5%) | |
| Did not have enough food during pregnancy | ||||
| No | 72 (80.0%) | 39 (60.0%) | 0.011 | 111 (71.6%) |
| Yes | 18 (20.0%) | 26 (40.0%) | 44 (28.4%) | |
| Did not want to get pregnant | ||||
| No | 62 (68.9%) | 5 (7.7%) | <0.001 | 67 (43.2%) |
| Yes | 28 (31.1%) | 60 (92.3%) | 88 (56.8%) | |
| No prenatal care | ||||
| No | 89 (98.9%) | 61 (93.8%) | 0.162 | 150 (96.8%) |
| Yes | 1 (1.1%) | 4 (6.2%) | 5 (3.2%) | |
| Afraid at night | ||||
| No | 68 (75.6%) | 13 (20.0%) | <0.001 | 81 (52.3%) |
| Yes | 22 (24.4%) | 52 (80.0%) | 74 (47.7%) | |
| Trouble paying bills | ||||
| No | 67 (74.4%) | 22 (33.8%) | <0.001 | 89 (57.4%) |
| Yes | 23 (25.6%) | 43 (66.2%) | 66 (42.6%) | |
| Emotionally abused | ||||
| No | 77 (85.6%) | 52 (80.0%) | 0.39 | 129 (83.2%) |
| Yes | 13 (14.4%) | 13 (20.0%) | 26 (16.8%) | |
| Physically abused | ||||
| No | 78 (86.7%) | 56 (86.2%) | 1 | 134 (86.5%) |
| Yes | 12 (13.3%) | 9 (13.8%) | 21 (13.5%) | |
| Did not have a wedding | ||||
| No | 44 (48.9%) | 3 (4.6%) | <0.001 | 47 (30.3%) |
| Yes | 46 (51.1%) | 62 (95.4%) | 108 (69.7%) | |
| Unhappy marriage | ||||
| No | 75 (83.3%) | 63 (96.9%) | 0.008 | 138 (89.0%) |
| Yes | 15 (16.7%) | 2 (3.1%) | 17 (11.0%) |
table1(~ uncomfortable_touching +
genital_rubbing +
forced_to_touch_someone +
raped_penetrative +
raped_oral_sex +
coerced_kiss | Cohort, data = df2, overall="Total",
extra.col=list(`p`=pvalue_table), extra.col.pos=3)
| General Maternity Ward (N=90) |
Sexual Violence Ward (N=65) |
p | Total (N=155) |
|
|---|---|---|---|---|
| Uncomfortably touched in intimate parts | ||||
| No | 72 (80.0%) | 46 (70.8%) | 0.189 | 118 (76.1%) |
| Yes | 18 (20.0%) | 19 (29.2%) | 37 (23.9%) | |
| Someone rubbed genitals against you | ||||
| No | 76 (84.4%) | 49 (75.4%) | 0.216 | 125 (80.6%) |
| Yes | 14 (15.6%) | 16 (24.6%) | 30 (19.4%) | |
| Forced to touch intimate parts | ||||
| No | 78 (86.7%) | 56 (86.2%) | 1 | 134 (86.5%) |
| Yes | 12 (13.3%) | 9 (13.8%) | 21 (13.5%) | |
| Someone had genital sex against your will | ||||
| No | 80 (88.9%) | 8 (12.3%) | <0.001 | 88 (56.8%) |
| Yes | 10 (11.1%) | 57 (87.7%) | 67 (43.2%) | |
| Forced to perform oral sex | ||||
| No | 83 (92.2%) | 59 (90.8%) | 0.776 | 142 (91.6%) |
| Yes | 7 (7.8%) | 6 (9.2%) | 13 (8.4%) | |
| Forced to kiss someone in a sexual way | ||||
| No | 65 (72.2%) | 60 (92.3%) | 0.002 | 125 (80.6%) |
| Yes | 25 (27.8%) | 5 (7.7%) | 30 (19.4%) |
table1(~ natural_disaster +
serious_accident +
serious_injury_illness +
death_illness_caretaker +
death_illness_sibling +
death_illness_friend +
witnessed_violence +
family_mental_illness +
caretaker_drugs +
witnessed_murder | Cohort, data = df2, overall="Total",
extra.col=list(`p`=pvalue_table), extra.col.pos=3)
| General Maternity Ward (N=90) |
Sexual Violence Ward (N=65) |
p | Total (N=155) |
|
|---|---|---|---|---|
| Exposed to life-threatening natural disaster | ||||
| No | 40 (44.4%) | 27 (41.5%) | 0.745 | 67 (43.2%) |
| Yes | 50 (55.6%) | 38 (58.5%) | 88 (56.8%) | |
| Involved in a serious accident | ||||
| No | 69 (76.7%) | 64 (98.5%) | <0.001 | 133 (85.8%) |
| Yes | 21 (23.3%) | 1 (1.5%) | 22 (14.2%) | |
| Suffered a serious personal injury or illness | ||||
| No | 73 (81.1%) | 59 (90.8%) | 0.112 | 132 (85.2%) |
| Yes | 17 (18.9%) | 6 (9.2%) | 23 (14.8%) | |
| Death or serious illness/injury of parent or caretaker | ||||
| No | 59 (65.6%) | 49 (75.4%) | 0.218 | 108 (69.7%) |
| Yes | 31 (34.4%) | 16 (24.6%) | 47 (30.3%) | |
| Death or serious illness/injury of a sibling | ||||
| No | 82 (91.1%) | 57 (87.7%) | 0.595 | 139 (89.7%) |
| Yes | 8 (8.9%) | 8 (12.3%) | 16 (10.3%) | |
| Death or serious illness/injury of friend | ||||
| No | 85 (94.4%) | 55 (84.6%) | 0.054 | 140 (90.3%) |
| Yes | 5 (5.6%) | 10 (15.4%) | 15 (9.7%) | |
| Witnessed violence towards others | ||||
| No | 22 (24.4%) | 8 (12.3%) | 0.066 | 30 (19.4%) |
| Yes | 68 (75.6%) | 57 (87.7%) | 125 (80.6%) | |
| Family mental illness | ||||
| No | 82 (91.1%) | 63 (96.9%) | 0.194 | 145 (93.5%) |
| Yes | 8 (8.9%) | 2 (3.1%) | 10 (6.5%) | |
| Parents or caretakes who use drugs | ||||
| No | 82 (91.1%) | 63 (96.9%) | 0.194 | 145 (93.5%) |
| Yes | 8 (8.9%) | 2 (3.1%) | 10 (6.5%) | |
| Witnessed someone murdered | ||||
| No | 85 (94.4%) | 61 (93.8%) | 1 | 146 (94.2%) |
| Yes | 5 (5.6%) | 4 (6.2%) | 9 (5.8%) |
mom_all_sig3 <- zhou %>%
select(probeID,gene) %>%
right_join(mom_all_sig, by = c("probeID" = "probe")) %>%
mutate(coefficient = round(coef, digits = 3)) %>%
mutate(p_value = scientific(pval, digits = 3)) %>%
left_join(zhou2, by = c("probeID")) %>%
left_join(illumina2, by = c("probeID")) %>%
select(probeID,coefficient,p_value,exposure,
generation,CpG_chrm,CpG_end,gene,CGIposition,distToTSS,gene_context) %>%
rename(chromosome=CpG_chrm,position = CpG_end) %>%
arrange(exposure)
baby_all_sig3 <- zhou %>%
select(probeID,gene) %>%
right_join(baby_all_sig, by = c("probeID" = "probe")) %>%
mutate(coefficient = round(coef, digits = 3)) %>%
mutate(p_value = scientific(pval, digits = 3)) %>%
left_join(zhou2, by = c("probeID")) %>%
left_join(illumina2, by = c("probeID")) %>%
select(probeID,coefficient,p_value,exposure,
generation,CpG_chrm,CpG_end,gene,CGIposition,distToTSS,gene_context) %>%
rename(chromosome=CpG_chrm,position = CpG_end) %>%
arrange(exposure)
library(ACME) # to get nearest gene function
# Mothers
# General trauma
m_near_1 <- findClosestGene(mom_all_sig3$chromosome[1],mom_all_sig3$position[1],'hg38')
m_near_2 <- findClosestGene(mom_all_sig3$chromosome[2],mom_all_sig3$position[2],'hg38')
m_near_3 <- findClosestGene(mom_all_sig3$chromosome[3],mom_all_sig3$position[3],'hg38')
m_near_4 <- findClosestGene(mom_all_sig3$chromosome[4],mom_all_sig3$position[4],'hg38')
# Sexual Events
m_near_5 <- findClosestGene(mom_all_sig3$chromosome[5],mom_all_sig3$position[5],'hg38')
m_near_6 <- findClosestGene(mom_all_sig3$chromosome[6],mom_all_sig3$position[6],'hg38')
m_near_7 <- findClosestGene(mom_all_sig3$chromosome[7],mom_all_sig3$position[7],'hg38')
m_near_8 <- findClosestGene(mom_all_sig3$chromosome[8],mom_all_sig3$position[8],'hg38')
m_near_9 <- findClosestGene(mom_all_sig3$chromosome[9],mom_all_sig3$position[9],'hg38')
m_near_10 <- findClosestGene(mom_all_sig3$chromosome[10],mom_all_sig3$position[10],'hg38')
m_near_11 <- findClosestGene(mom_all_sig3$chromosome[11],mom_all_sig3$position[11],'hg38')
m_near_12 <- findClosestGene(mom_all_sig3$chromosome[12],mom_all_sig3$position[12],'hg38')
m_near_13 <- findClosestGene(mom_all_sig3$chromosome[13],mom_all_sig3$position[13],'hg38')
# War Trauma
m_near_14 <- findClosestGene(mom_all_sig3$chromosome[14],mom_all_sig3$position[14],'hg38')
m_near_15 <- findClosestGene(mom_all_sig3$chromosome[15],mom_all_sig3$position[15],'hg38')
m_nearest <- rbind(m_near_1,m_near_2,m_near_3,m_near_4,m_near_5,m_near_6,
m_near_7,m_near_8,m_near_9,m_near_10,m_near_11,m_near_12,
m_near_13,m_near_14,m_near_15)
m_nearest_2 <- m_nearest %>%
distinct(geneName, .keep_all = TRUE)
# Newborns
# General Trauma
b_near_1 <- findClosestGene(baby_all_sig3$chromosome[1],baby_all_sig3$position[1],'hg38')
b_near_2 <- findClosestGene(baby_all_sig3$chromosome[2],baby_all_sig3$position[2],'hg38')
# Sexual Events
b_near_3 <- findClosestGene(baby_all_sig3$chromosome[3],baby_all_sig3$position[3],'hg38')
b_near_4 <- findClosestGene(baby_all_sig3$chromosome[4],baby_all_sig3$position[4],'hg38')
b_near_5 <- findClosestGene(baby_all_sig3$chromosome[5],baby_all_sig3$position[5],'hg38')
b_near_6 <- findClosestGene(baby_all_sig3$chromosome[6],baby_all_sig3$position[6],'hg38')
b_near_7 <- findClosestGene(baby_all_sig3$chromosome[7],baby_all_sig3$position[7],'hg38')
b_near_8 <- findClosestGene(baby_all_sig3$chromosome[8],baby_all_sig3$position[8],'hg38')
# War Trauma
b_near_9 <- findClosestGene(baby_all_sig3$chromosome[9],baby_all_sig3$position[9],'hg38')
b_near_10 <- findClosestGene(baby_all_sig3$chromosome[10],baby_all_sig3$position[10],'hg38')
b_near_11 <- findClosestGene(baby_all_sig3$chromosome[11],baby_all_sig3$position[11],'hg38')
b_nearest <- rbind(b_near_1,b_near_2,b_near_3,b_near_4,b_near_5,b_near_6,
b_near_7,b_near_8,b_near_9,b_near_10,b_near_11)
b_nearest_2 <- b_nearest %>%
distinct(geneName, .keep_all = TRUE)
Make tables that list probes and their nearest gene information
mom_near <- cbind.data.frame(mom_all_sig3,m_nearest_2) %>%
relocate(geneName,.after = gene) %>%
rename(nearest_gene = geneName)
datatable(mom_near[c("exposure","probeID","coefficient","p_value","gene","nearest_gene","CGIposition",
"gene_context")],
filter = "top", rownames = FALSE, width = '100%',
options = list(scrollX = TRUE),
caption = htmltools::tags$caption(style = 'caption-side: top; text-align: center; color:black; font-size:200% ;','All significant hits in mothers across four maternal stress EWAS'))
# Table of significant hits for mothers
mom_near %>%
select(exposure,probeID,coefficient,p_value,gene,nearest_gene,CGIposition,gene_context) %>%
arrange(exposure) %>%
kbl() %>%
kable_styling(full_width = FALSE) %>%
kable_material() %>%
pack_rows(group_label = "General Trauma",1,4) %>%
pack_rows(group_label = "Sexual Events",5,13) %>%
pack_rows(group_label = "War Stress",14,15)
| exposure | probeID | coefficient | p_value | gene | nearest_gene | CGIposition | gene_context |
|---|---|---|---|---|---|---|---|
| General Trauma | |||||||
| general_trauma | cg11408019 | 0.003 | 1.10e-10 | NA | RAD54L2 | Island | |
| general_trauma | cg14519777 | 0.006 | 6.87e-08 | CTA-339C12.1;CUX1 | CUX1 | NA | Body;Body;Body |
| general_trauma | cg14282695 | 0.009 | 1.48e-08 | SAMD4A | SAMD4A | NA | Body;Body |
| general_trauma | cg16543391 | -0.001 | 6.25e-08 | EML2;MIR330 | EML2 | N_Shelf | TSS1500;TSS200;TSS200;Body;Body |
| Sexual Events | |||||||
| sexual_events | cg21219607 | -0.002 | 5.50e-08 | PARP15 | PARP15 | NA | TSS1500;TSS1500;Body;Body |
| sexual_events | cg06308131 | -0.004 | 9.71e-10 | MUC4 | MUC20 | S_Shore | Body;Body;Body |
| sexual_events | cg04358942 | -0.001 | 2.86e-08 | RP11-64D24.2 | LOC101928940 | NA | |
| sexual_events | cg23527517 | -0.008 | 1.23e-08 | OTOP3 | OTOP3 | NA | Body |
| sexual_events | cg14859642 | 0.007 | 6.99e-08 | NA | BPIFA2 | NA | |
| sexual_events | cg00489624 | -0.003 | 1.10e-08 | DLGAP4 | DLGAP4 | NA | 5’UTR |
| sexual_events | cg24308336 | -0.007 | 3.79e-08 | ARHGAP40 | ARHGAP40 | N_Shelf | Body |
| sexual_events | cg16765764 | -0.002 | 9.46e-09 | DHX35 | DHX35 | NA | Body;Body;Body |
| sexual_events | cg10897169 | -0.003 | 6.81e-08 | BCAS4 | ADNP | NA | Body;Body;Body |
| War Stress | |||||||
| war_stress | cg13740840 | -0.002 | 5.99e-08 | KIF15;MIR564;TMEM42 | MIR564 | Island | TSS1500;TSS200 |
| war_stress | cg26486174 | 0.013 | 1.61e-08 | NA | HMGA1 | Island | |
baby_near <- cbind.data.frame(baby_all_sig3,b_nearest_2) %>%
relocate(geneName,.after = gene) %>%
rename(nearest_gene = geneName)
datatable(baby_near[c("exposure",
"probeID",
"coefficient",
"p_value",
"gene",
"nearest_gene",
"CGIposition",
"gene_context")],
filter = "top", rownames = FALSE, width = '100%',
options = list(scrollX = TRUE),
caption = htmltools::tags$caption(style = 'caption-side: top; text-align: center; color:black; font-size:200% ;','All significant hits in babies across four maternal stress EWAS'))
# Table of significant hits for mothers
baby_near %>%
select(exposure,probeID,coefficient,p_value,gene,nearest_gene,CGIposition,gene_context) %>%
arrange(exposure) %>%
kbl() %>%
kable_styling(full_width = FALSE) %>%
kable_material() %>%
pack_rows(group_label = "General Trauma",1,2) %>%
pack_rows(group_label = "Sexual Events",3,8) %>%
pack_rows(group_label = "War Stress",9,11)
| exposure | probeID | coefficient | p_value | gene | nearest_gene | CGIposition | gene_context |
|---|---|---|---|---|---|---|---|
| General Trauma | |||||||
| general_trauma | cg24590750 | 0.000 | 4.25e-08 | TBPL1 | TBPL1 | Island | TSS1500;TSS200 |
| general_trauma | cg10783680 | 0.000 | 6.23e-08 | EXOC7 | EXOC7 | Island | 5’UTR;1stExon;5’UTR;1stExon;5’UTR;1stExon;5’UTR;1stExon;1stExon;5’UTR;TSS200 |
| Sexual Events | |||||||
| sexual_events | cg10338475 | 0.006 | 4.74e-11 | CTC-436P18.1;SMIM15 | SMIM15 | S_Shore | TSS1500;Body |
| sexual_events | cg11386818 | 0.002 | 3.97e-09 | SYNCRIP | SYNCRIP | Island | TSS1500;5’UTR;5’UTR;5’UTR;TSS1500;5’UTR |
| sexual_events | cg02176407 | 0.003 | 5.69e-08 | POU3F2 | POU3F2 | Island | 1stExon |
| sexual_events | cg20807701 | 0.003 | 2.80e-11 | DNAJB9;PNPLA8;THAP5 | THAP5 | N_Shore | Body;TSS1500;5’UTR;1stExon |
| sexual_events | cg09631059 | -0.005 | 8.48e-09 | RP11-1391J7.1;TSPAN4 | TSPAN4 | N_Shore | 5’UTR;Body;Body;Body;Body;Body;Body |
| sexual_events | cg06873316 | 0.001 | 4.03e-08 | NELL1 | NELL1 | Island | TSS1500;TSS1500 |
| War Stress | |||||||
| war_stress | cg08985979 | 0.006 | 2.10e-08 | AC108142.1 | TENM3-AS1 | S_Shore | Body |
| war_stress | cg21172322 | 0.018 | 5.88e-10 | BCAT1;RP11-662I13.3 | BCAT1 | N_Shore | Body |
| war_stress | cg00741900 | 0.007 | 2.98e-08 | DIO3;DIO3OS;MIR1247 | DIO3 | Island | 5’UTR;1stExon;TSS1500 |
For epigenetic age analyses, processed data are in the noobBetas object but can be regenerated here:
# rearrange the object to comply with formatting for GEO deposition:
noobBetasGEO <- noobBetas[,c(ncol(noobBetas),1:(ncol(noobBetas)-1))]
colnames(noobBetasGEO)[1] <- "ID_REF"
rm(noobBetas)
# add in the detection p-values below
For epigenetic age analyses. Get raw data here:
if(file.exists(here("data", "congo_mothers_and_babies_eaa_raw_GEO.rds"))) {
raw_eaa_GEO <- readRDS(file = here("data", "congo_mothers_and_babies_eaa_raw_GEO.rds"))
} else {
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("readIDATpair","noob","getBetas","pOOBAH"))
raw_eaa_GEO <- parLapply(cl,df5$Basename, function(pfx) {
readIDATpair(pfx)
})
stopCluster(cl)
names(raw_eaa_GEO) <- df5$methylation_id
saveRDS(raw_eaa_GEO, file = here("data","congo_mothers_and_babies_eaa_raw_GEO.rds"))
}
For epigenetic age analyses. Get detection p values here:
if(file.exists(here("data", "congo_mothers_and_babies_eaa_raw_pvals_GEO.rds"))) {
pvals <- readRDS(file = here("data", "congo_mothers_and_babies_eaa_raw_pvals_GEO.rds"))
} else {
# How to get the detection p-values
cl <- makeCluster(16)
registerDoParallel(cl)
clusterExport(cl, c("pOOBAH"))
pvals <- do.call(cbind,parLapply(cl,raw_eaa_GEO[1:356], function(x) {
pOOBAH(x, return.pval = TRUE)
}))
stopCluster(cl)
saveRDS(pvals, file = here("data","congo_mothers_and_babies_eaa_raw_pvals_GEO.rds"))
}
pvals <- as.data.frame(pvals)
pvals$ID_REF <- rownames(pvals)
Now combine the processed betas with the detection p-values. Write a .gz compressed .csv file with the required formatting.
detection_pvals <- pvals
colnames(detection_pvals)[-ncol(detection_pvals)] <-
paste(colnames(pvals)[-ncol(detection_pvals)], "Detection Pval", sep = " ")
processed <- merge(noobBetasGEO,detection_pvals, by = "ID_REF")
processed <- processed[,order(colnames(processed))]
# Make ID_REF the first column to appear
processed <- processed %>%
relocate(ID_REF)
# fwrite(processed,
# file = here("output","geo_processed_matrix_v1.csv.gz"),
# row.names = FALSE)
Now format the raw data and combine with the detection p-values
# Select the appropriate signal intensity values for Type I
# and Type II Infinium probes respectively.
unprocessed <- lapply(raw_eaa_GEO, function(x) {
x <- x %>%
mutate(methylated = case_when(
col == "2" ~ UG,
col == "G" ~ MG,
col == "R" ~ MR)) %>%
mutate(unmethylated = case_when(
col == "2" ~ UR,
col == "G" ~ UG,
col == "R" ~ UR)) %>%
select(Probe_ID,methylated,unmethylated) %>%
rename(ID_REF = Probe_ID,
"Methylated signal" = methylated,
"Unmethylated signal" = unmethylated)
})
# Change the column names to match formatting requirements
unprocessed <- imap(unprocessed, ~rename_all(.x, function(z) paste(.y, z, sep = " "))) %>%
bind_cols() %>%
rename(ID_REF = 1) %>%
select(-contains(" ID_REF"))
# Now combine with the detection p-values
unprocessed_detect <- unprocessed %>%
left_join(detection_pvals, by = c("ID_REF"))
# Then reorder the columns as prescribed by GEO
unprocessed_detect <- unprocessed_detect %>%
select(order(colnames(unprocessed_detect),decreasing = TRUE)) %>%
relocate(ID_REF, everything())
# fwrite(unprocessed_detect,
# file = here("output","geo_matrix_signal_v1.csv.gz"),
# row.names = FALSE)
For Epigenetic age analyses. Get metadata sheet here:
eaa_geo_metadata <- df5 %>%
mutate(title = paste("genomic DNA from venous blood",
1:nrow(df5),
sep = " "),
"source name" = paste("venous blood",
1:nrow(df5),
sep = " "),
organism = c("Homo sapiens"),
"idat file 1" = paste(str_sub(Basename,-19,-1),
"_Grn.idat",
sep = ""),
"idat file 2" = paste(str_sub(Basename,-19,-1),
"_Red.idat",
sep = ""),
"characteristics: gender" = if_else(sex == "M","Male","Female"),
molecule = c("genomic DNA"),
label = c("Cy5 and Cy3"),
description = c("Normal venous blood sample"),
platform = c("GPL21145"),
maternal_age = age,
Age = ifelse(tissue == "baby_venous_blood",0,age))
eaa_geo_metadata <- mage %>%
select(methylation_id,DNAmAge,AgeAccelerationResidual) %>%
right_join(eaa_geo_metadata, by = c("methylation_id"))
eaa_geo_metadata$Delivery_Mode <- factor(eaa_geo_metadata$pcsec, levels = c(0,1),
labels = c("vaginal","caesarean section"))
eaa_geo_metadata$Alcohol <- factor(eaa_geo_metadata$palco, levels = c(0,1),
labels = c("No","Yes"))
eaa_geo_metadata$Parity <- factor(eaa_geo_metadata$is_this_your_first_child,
levels = c(0,1),
labels = c("Multigravida","Primigravida"))
eaa_geo_metadata$Gestational_Age <- eaa_geo_metadata$ga_meth/7
eaa_geo_metadata$General_Trauma <- eaa_geo_metadata$gtsum
eaa_geo_metadata$Sexual_Events <- eaa_geo_metadata$setot
eaa_geo_metadata$War_Stress <- eaa_geo_metadata$awar_nr
eaa_geo_metadata$Chronic_Stress <- eaa_geo_metadata$achronic
eaa_geo_metadata$Cohort <- factor(eaa_geo_metadata$cohort, levels = c("C","SV"),
labels = c("General Maternity Ward",
"Sexual Violence Ward"))
eaa_geo_metadata$bmi = eaa_geo_metadata$mwgt/((eaa_geo_metadata$mhgt/100)^2)
eaa_geo_metadata <- eaa_geo_metadata %>%
rename('characteristics: birthweight' = bwgt,
'characteristics: maternal bmi' = bmi,
'characteristics: age' = Age,
'characteristics: dyad' = dyad,
'characteristics: maternal age' = maternal_age,
'characteristics: delivery mode' = Delivery_Mode,
'characteristics: maternal alcohol' = Alcohol,
'characteristics: parity' = Parity,
'characteristics: gestational age' = Gestational_Age,
'characteristics: general trauma' = General_Trauma,
'characteristics: sexual trauma' = Sexual_Events,
'characteristics: war trauma' = War_Stress,
'characteristics: chronic stress' = Chronic_Stress,
'characteristics: cohort' = Cohort,
'characteristics: tissue' = tissue,
'characteristics: replicate' = replicate,
'characteristics: replicate id' = replicate_id,
'characteristics: dna methylation age' = DNAmAge,
'characteristics: age acceleration' = AgeAccelerationResidual) %>%
select(methylation_id,title,'source name', organism,
'idat file 1','idat file 2',
'characteristics: gender',
'characteristics: tissue',
'characteristics: age',
'characteristics: birthweight',
'characteristics: dyad',
'characteristics: maternal age',
'characteristics: delivery mode',
'characteristics: maternal alcohol',
'characteristics: parity',
'characteristics: gestational age',
'characteristics: general trauma',
'characteristics: sexual trauma',
'characteristics: war trauma',
'characteristics: chronic stress',
'characteristics: cohort',
'characteristics: replicate',
'characteristics: replicate id',
'characteristics: dna methylation age',
'characteristics: age acceleration',
molecule,label,
description, platform)
fwrite(eaa_geo_metadata,
file = here("output","geo_eaa_metadata_v1.csv"),
row.names = FALSE)
For EWAS analyses. Get processed data from the “betas” object generated above and format to GEO requirements. Then add in the detection p values.
rm(processed)
betas$ID_REF <- rownames(betas)
ewas_processed <- merge(betas,detection_pvals, by = "ID_REF")
ewas_processed <- ewas_processed[,order(colnames(ewas_processed))]
# Make ID_REF the first column to appear
ewas_processed <- ewas_processed %>%
relocate(ID_REF)
# fwrite(ewas_processed,
# file = here("output","geo_ewas_processed_matrix_v1.csv.gz"),
# row.names = FALSE)
For EWAS analyses. The raw data are the same as the raw data for epigenetic age analyses. Just write out another file and change the name so that the file is stored appropriately.
# fwrite(unprocessed_detect,
# file = here("output","geo_ewas_matrix_signal_v1.csv.gz"),
# row.names = FALSE)
the metadata for the ewas are the same as for the epigenetic age analyses.
fwrite(eaa_geo_metadata,
file = here("output","geo_ewas_metadata_v1.csv"),
row.names = FALSE)
Don’t forget to write out a codebook explaining column headers in the metadata sheet, which will be the same for both data sets.
sub_names <- colnames(eaa_geo_metadata)
df <- data.frame(matrix(ncol = 2, nrow = 29))
df$X1 <- sub_names
colnames(df) <- c("column header","description")
df$description <- c("unique sample id",
"unique title that describes the sample",
"briefly identify the biological material",
"scientific name of organism from which the biological material was derived",
"the Green .idat file corresponding to the sample",
"the Red .idat file corresponding to the sample",
"the sex of the participant",
"describes whether the tissue is mother or newborn venous blood",
"age of the participant",
"birthweight of the baby in the dyad",
"the recruitment dyad to which the participant belongs",
"the age of the mother in the dyad",
"was the delivery vaginal or cesarean section",
"did the mother drink alcohol during pregnancy",
"is the newborn the mother's first child",
"gestational age in weeks estimated using DNA methylation data",
"score based on the general trauma section of the early trauma inventory short form",
"score based on the sexual abuse section of the early trauma inventory short form",
"score based on an ethnographic measure from a prior publication in Child Development by Darlene Kertes et al. 2016",
"score based on an ethnographic measure from a prior publication in Child Development by Darlene Kertes et al. 2016",
"was the participant recruited through the general maternity program or the sexual abuse survivor program",
"is this sample a replicate",
"a unique id assigned to a sample within replicate groups",
"dna methylation age of the participant",
"age acceleration of the participant",
"type of molecule that was extracted from the biological materials",
"compound used to label the extract",
"basic description of the sample",
"what platform were samples processed on")
fwrite(df,
file = here("output","geo_column_headers_v1.csv"),
row.names = FALSE)